Merge "Use GCC on ARM64"
diff --git a/Android.mk b/Android.mk
index f7f65ac..593ee04 100644
--- a/Android.mk
+++ b/Android.mk
@@ -189,6 +189,13 @@
 test-art-host-oat: test-art-host-oat-default test-art-host-oat-interpreter
 	@echo test-art-host-oat PASSED
 
+FAILING_OPTIMIZING_MESSAGE := failed with the optimizing compiler. If the test passes \
+  with Quick and interpreter, it is probably just a bug in the optimizing compiler. Please \
+  add the test name to the FAILING_OPTIMIZING_TESTS Makefile variable in art/Android.mk, \
+  and file a bug.
+
+# Placeholder for failing tests on the optimizing compiler.
+
 define declare-test-art-host-run-test
 .PHONY: test-art-host-run-test-default-$(1)
 test-art-host-run-test-default-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
@@ -197,6 +204,14 @@
 
 TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS += test-art-host-run-test-default-$(1)
 
+.PHONY: test-art-host-run-test-optimizing-$(1)
+test-art-host-run-test-optimizing-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test -Xcompiler-option --compiler-backend=Optimizing $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host $(1) \
+	|| (echo -e "\x1b[31;01mTest $(1) $(FAILING_OPTIMIZING_MESSAGE)\x1b[0m" && exit 1)
+	@echo test-art-host-run-test-optimizing-$(1) PASSED
+
+TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS += test-art-host-run-test-optimizing-$(1)
+
 .PHONY: test-art-host-run-test-interpreter-$(1)
 test-art-host-run-test-interpreter-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
 	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host --interpreter $(1)
@@ -205,7 +220,7 @@
 TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS += test-art-host-run-test-interpreter-$(1)
 
 .PHONY: test-art-host-run-test-$(1)
-test-art-host-run-test-$(1): test-art-host-run-test-default-$(1) test-art-host-run-test-interpreter-$(1)
+test-art-host-run-test-$(1): test-art-host-run-test-default-$(1) test-art-host-run-test-interpreter-$(1) test-art-host-run-test-optimizing-$(1)
 
 endef
 
@@ -215,12 +230,21 @@
 test-art-host-run-test-default: $(TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS)
 	@echo test-art-host-run-test-default PASSED
 
+FAILING_OPTIMIZING_TESTS :=
+$(foreach test, $(FAILING_OPTIMIZING_TESTS), \
+	$(eval TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS := $(filter-out test-art-host-run-test-optimizing-$(test), $(TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS))))
+
+.PHONY: test-art-host-run-test-optimizing
+test-art-host-run-test-optimizing: $(TEST_ART_HOST_RUN_TEST_OPTIMIZING_TARGETS)
+	$(foreach test, $(FAILING_OPTIMIZING_TESTS), $(info Optimizing compiler has skipped $(test)))
+	@echo test-art-host-run-test-optimizing PASSED
+
 .PHONY: test-art-host-run-test-interpreter
 test-art-host-run-test-interpreter: $(TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS)
 	@echo test-art-host-run-test-interpreter PASSED
 
 .PHONY: test-art-host-run-test
-test-art-host-run-test: test-art-host-run-test-default test-art-host-run-test-interpreter
+test-art-host-run-test: test-art-host-run-test-default test-art-host-run-test-interpreter test-art-host-run-test-optimizing
 	@echo test-art-host-run-test PASSED
 
 ########################################################################
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index fbb7eb3..c67a815 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -42,6 +42,11 @@
 
 $(HOST_CORE_OAT_OUT): $(HOST_CORE_IMG_OUT)
 
+IMPLICIT_CHECKS_arm := null,stack
+IMPLICIT_CHECKS_arm64 := none
+IMPLICIT_CHECKS_x86 := none
+IMPLICIT_CHECKS_x86_64 := none
+IMPLICIT_CHECKS_mips := none
 define create-oat-target-targets
 $$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
 	@echo "target dex2oat: $$@ ($$?)"
@@ -49,6 +54,7 @@
 	$$(hide) $$(DEX2OATD) --runtime-arg -Xms16m --runtime-arg -Xmx16m --image-classes=$$(PRELOADED_CLASSES) $$(addprefix \
 		--dex-file=,$$(TARGET_CORE_DEX_FILES)) $$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)TARGET_CORE_OAT_OUT) \
 		--oat-location=$$($(1)TARGET_CORE_OAT) --image=$$($(1)TARGET_CORE_IMG_OUT) --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) \
+		--implicit-checks=$(IMPLICIT_CHECKS_$($(1)TARGET_ARCH)) \
 		--instruction-set=$$($(1)TARGET_ARCH) --instruction-set-features=$$(TARGET_INSTRUCTION_SET_FEATURES) --android-root=$$(PRODUCT_OUT)/system
 
 # This "renaming" eases declaration in art/Android.mk
@@ -58,7 +64,7 @@
 endef
 
 ifdef TARGET_2ND_ARCH
-$(eval $(call create-oat-target-targets,2ND_))
+  $(eval $(call create-oat-target-targets,2ND_))
 endif
 $(eval $(call create-oat-target-targets,))
 
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 7441dac..f098a34 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -86,7 +86,11 @@
 }
 
 size_t CompiledCode::CodeDelta() const {
-  switch (instruction_set_) {
+  return CodeDelta(instruction_set_);
+}
+
+size_t CompiledCode::CodeDelta(InstructionSet instruction_set) {
+  switch (instruction_set) {
     case kArm:
     case kArm64:
     case kMips:
@@ -98,7 +102,7 @@
       return 1;
     }
     default:
-      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set_;
+      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return 0;
   }
 }
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 23cd250..b8cd851 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -67,6 +67,7 @@
   // returns the difference between the code address and a usable PC.
   // mainly to cope with kThumb2 where the lower bit must be set.
   size_t CodeDelta() const;
+  static size_t CodeDelta(InstructionSet instruction_set);
 
   // Returns a pointer suitable for invoking the code at the argument
   // code_pointer address.  Mainly to cope with kThumb2 where the
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index b8d190a..0845656 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -134,9 +134,6 @@
   }
 }
 
-// Enable opcodes that mostly work, but produce assertion errors (thus breaking libartd.so).
-#define ARM64_USE_EXPERIMENTAL_OPCODES 0
-
 // TODO: Remove this when we are able to compile everything.
 int arm64_support_list[] = {
     Instruction::NOP,
@@ -149,6 +146,9 @@
     Instruction::MOVE_OBJECT,
     Instruction::MOVE_OBJECT_FROM16,
     Instruction::MOVE_OBJECT_16,
+    Instruction::MOVE_RESULT,
+    Instruction::MOVE_RESULT_WIDE,
+    Instruction::MOVE_RESULT_OBJECT,
     Instruction::MOVE_EXCEPTION,
     Instruction::RETURN_VOID,
     Instruction::RETURN,
@@ -163,9 +163,19 @@
     Instruction::CONST_WIDE,
     Instruction::CONST_WIDE_HIGH16,
     Instruction::CONST_STRING,
+    Instruction::CONST_STRING_JUMBO,
+    Instruction::CONST_CLASS,
     Instruction::MONITOR_ENTER,
     Instruction::MONITOR_EXIT,
-    // Instruction::THROW,
+    Instruction::CHECK_CAST,
+    Instruction::INSTANCE_OF,
+    Instruction::ARRAY_LENGTH,
+    Instruction::NEW_INSTANCE,
+    Instruction::NEW_ARRAY,
+    Instruction::FILLED_NEW_ARRAY,
+    Instruction::FILLED_NEW_ARRAY_RANGE,
+    Instruction::FILL_ARRAY_DATA,
+    Instruction::THROW,
     Instruction::GOTO,
     Instruction::GOTO_16,
     Instruction::GOTO_32,
@@ -188,9 +198,85 @@
     Instruction::IF_GEZ,
     Instruction::IF_GTZ,
     Instruction::IF_LEZ,
+    Instruction::UNUSED_3E,
+    Instruction::UNUSED_3F,
+    Instruction::UNUSED_40,
+    Instruction::UNUSED_41,
+    Instruction::UNUSED_42,
+    Instruction::UNUSED_43,
+    Instruction::AGET,
+    Instruction::AGET_WIDE,
+    Instruction::AGET_OBJECT,
+    Instruction::AGET_BOOLEAN,
+    Instruction::AGET_BYTE,
+    Instruction::AGET_CHAR,
+    Instruction::AGET_SHORT,
+    Instruction::APUT,
+    Instruction::APUT_WIDE,
+    Instruction::APUT_OBJECT,
+    Instruction::APUT_BOOLEAN,
+    Instruction::APUT_BYTE,
+    Instruction::APUT_CHAR,
+    Instruction::APUT_SHORT,
+    Instruction::IGET,
+    Instruction::IGET_WIDE,
+    Instruction::IGET_OBJECT,
+    Instruction::IGET_BOOLEAN,
+    Instruction::IGET_BYTE,
+    Instruction::IGET_CHAR,
+    Instruction::IGET_SHORT,
+    Instruction::IPUT,
+    Instruction::IPUT_WIDE,
+    Instruction::IPUT_OBJECT,
+    Instruction::IPUT_BOOLEAN,
+    Instruction::IPUT_BYTE,
+    Instruction::IPUT_CHAR,
+    Instruction::IPUT_SHORT,
+    Instruction::SGET,
+    Instruction::SGET_WIDE,
+    Instruction::SGET_OBJECT,
+    Instruction::SGET_BOOLEAN,
+    Instruction::SGET_BYTE,
+    Instruction::SGET_CHAR,
+    Instruction::SGET_SHORT,
+    Instruction::SPUT,
+    Instruction::SPUT_WIDE,
+    Instruction::SPUT_OBJECT,
+    Instruction::SPUT_BOOLEAN,
+    Instruction::SPUT_BYTE,
+    Instruction::SPUT_CHAR,
+    Instruction::SPUT_SHORT,
+    Instruction::INVOKE_VIRTUAL,
+    Instruction::INVOKE_SUPER,
+    Instruction::INVOKE_DIRECT,
+    Instruction::INVOKE_STATIC,
+    Instruction::INVOKE_INTERFACE,
+    Instruction::RETURN_VOID_BARRIER,
+    Instruction::INVOKE_VIRTUAL_RANGE,
+    Instruction::INVOKE_SUPER_RANGE,
+    Instruction::INVOKE_DIRECT_RANGE,
+    Instruction::INVOKE_STATIC_RANGE,
+    Instruction::INVOKE_INTERFACE_RANGE,
+    Instruction::UNUSED_79,
+    Instruction::UNUSED_7A,
     Instruction::NEG_INT,
     Instruction::NOT_INT,
+    Instruction::NEG_LONG,
+    Instruction::NOT_LONG,
     Instruction::NEG_FLOAT,
+    Instruction::NEG_DOUBLE,
+    Instruction::INT_TO_LONG,
+    Instruction::INT_TO_FLOAT,
+    Instruction::INT_TO_DOUBLE,
+    Instruction::LONG_TO_INT,
+    Instruction::LONG_TO_FLOAT,
+    Instruction::LONG_TO_DOUBLE,
+    Instruction::FLOAT_TO_INT,
+    Instruction::FLOAT_TO_LONG,
+    Instruction::FLOAT_TO_DOUBLE,
+    Instruction::DOUBLE_TO_INT,
+    Instruction::DOUBLE_TO_LONG,
+    Instruction::DOUBLE_TO_FLOAT,
     Instruction::INT_TO_BYTE,
     Instruction::INT_TO_CHAR,
     Instruction::INT_TO_SHORT,
@@ -205,10 +291,27 @@
     Instruction::SHL_INT,
     Instruction::SHR_INT,
     Instruction::USHR_INT,
+    Instruction::ADD_LONG,
+    Instruction::SUB_LONG,
+    Instruction::MUL_LONG,
+    Instruction::DIV_LONG,
+    Instruction::REM_LONG,
+    Instruction::AND_LONG,
+    Instruction::OR_LONG,
+    Instruction::XOR_LONG,
+    Instruction::SHL_LONG,
+    Instruction::SHR_LONG,
+    Instruction::USHR_LONG,
     Instruction::ADD_FLOAT,
     Instruction::SUB_FLOAT,
     Instruction::MUL_FLOAT,
     Instruction::DIV_FLOAT,
+    Instruction::REM_FLOAT,
+    Instruction::ADD_DOUBLE,
+    Instruction::SUB_DOUBLE,
+    Instruction::MUL_DOUBLE,
+    Instruction::DIV_DOUBLE,
+    Instruction::REM_DOUBLE,
     Instruction::ADD_INT_2ADDR,
     Instruction::SUB_INT_2ADDR,
     Instruction::MUL_INT_2ADDR,
@@ -220,10 +323,27 @@
     Instruction::SHL_INT_2ADDR,
     Instruction::SHR_INT_2ADDR,
     Instruction::USHR_INT_2ADDR,
+    Instruction::ADD_LONG_2ADDR,
+    Instruction::SUB_LONG_2ADDR,
+    Instruction::MUL_LONG_2ADDR,
+    Instruction::DIV_LONG_2ADDR,
+    Instruction::REM_LONG_2ADDR,
+    Instruction::AND_LONG_2ADDR,
+    Instruction::OR_LONG_2ADDR,
+    Instruction::XOR_LONG_2ADDR,
+    Instruction::SHL_LONG_2ADDR,
+    Instruction::SHR_LONG_2ADDR,
+    Instruction::USHR_LONG_2ADDR,
     Instruction::ADD_FLOAT_2ADDR,
     Instruction::SUB_FLOAT_2ADDR,
     Instruction::MUL_FLOAT_2ADDR,
     Instruction::DIV_FLOAT_2ADDR,
+    Instruction::REM_FLOAT_2ADDR,
+    Instruction::ADD_DOUBLE_2ADDR,
+    Instruction::SUB_DOUBLE_2ADDR,
+    Instruction::MUL_DOUBLE_2ADDR,
+    Instruction::DIV_DOUBLE_2ADDR,
+    Instruction::REM_DOUBLE_2ADDR,
     Instruction::ADD_INT_LIT16,
     Instruction::RSUB_INT,
     Instruction::MUL_INT_LIT16,
@@ -243,82 +363,35 @@
     Instruction::SHL_INT_LIT8,
     Instruction::SHR_INT_LIT8,
     Instruction::USHR_INT_LIT8,
-    Instruction::SGET,
-    Instruction::SGET_BOOLEAN,
-    Instruction::SGET_BYTE,
-    Instruction::SGET_CHAR,
-    Instruction::SGET_SHORT,
-    Instruction::SGET_OBJECT,
-    Instruction::SPUT,
-    Instruction::SPUT_OBJECT,
-    Instruction::SPUT_BOOLEAN,
-    Instruction::SPUT_BYTE,
-    Instruction::SPUT_CHAR,
-    Instruction::SPUT_SHORT,
-    Instruction::CMPL_FLOAT,
-    Instruction::CMPG_FLOAT,
-    Instruction::IGET,
-    Instruction::IGET_OBJECT,
-    Instruction::IGET_BOOLEAN,
-    Instruction::IGET_BYTE,
-    Instruction::IGET_CHAR,
-    Instruction::IGET_SHORT,
-    Instruction::IPUT,
-    Instruction::IPUT_OBJECT,
-    Instruction::IPUT_BOOLEAN,
-    Instruction::IPUT_BYTE,
-    Instruction::IPUT_CHAR,
-    Instruction::IPUT_SHORT,
-    Instruction::NEG_LONG,
-    Instruction::NOT_LONG,
-    Instruction::NEG_DOUBLE,
-    Instruction::INT_TO_LONG,
-    Instruction::INT_TO_FLOAT,
-    Instruction::INT_TO_DOUBLE,
-    Instruction::LONG_TO_INT,
-    Instruction::LONG_TO_FLOAT,
-    Instruction::LONG_TO_DOUBLE,
-    Instruction::FLOAT_TO_INT,
-    Instruction::FLOAT_TO_LONG,
-    Instruction::FLOAT_TO_DOUBLE,
-    Instruction::DOUBLE_TO_INT,
-    Instruction::DOUBLE_TO_LONG,
-    Instruction::DOUBLE_TO_FLOAT,
-    Instruction::ADD_LONG,
-    Instruction::SUB_LONG,
-    Instruction::MUL_LONG,
-    Instruction::DIV_LONG,
-    Instruction::REM_LONG,
-    Instruction::AND_LONG,
-    Instruction::OR_LONG,
-    Instruction::XOR_LONG,
-    Instruction::SHL_LONG,
-    Instruction::SHR_LONG,
-    Instruction::USHR_LONG,
-    // Instruction::REM_FLOAT,
-    Instruction::ADD_DOUBLE,
-    Instruction::SUB_DOUBLE,
-    Instruction::MUL_DOUBLE,
-    Instruction::DIV_DOUBLE,
-    // Instruction::REM_DOUBLE,
-    Instruction::ADD_LONG_2ADDR,
-    Instruction::SUB_LONG_2ADDR,
-    Instruction::MUL_LONG_2ADDR,
-    Instruction::DIV_LONG_2ADDR,
-    Instruction::REM_LONG_2ADDR,
-    Instruction::AND_LONG_2ADDR,
-    Instruction::OR_LONG_2ADDR,
-    Instruction::XOR_LONG_2ADDR,
-    Instruction::SHL_LONG_2ADDR,
-    Instruction::SHR_LONG_2ADDR,
-    Instruction::USHR_LONG_2ADDR,
-    // Instruction::REM_FLOAT_2ADDR,
-    Instruction::ADD_DOUBLE_2ADDR,
-    Instruction::SUB_DOUBLE_2ADDR,
-    Instruction::MUL_DOUBLE_2ADDR,
-    Instruction::DIV_DOUBLE_2ADDR,
-    // Instruction::REM_DOUBLE_2ADDR,
-    // TODO(Arm64): Enable compiler pass
+    Instruction::IGET_QUICK,
+    Instruction::IGET_WIDE_QUICK,
+    Instruction::IGET_OBJECT_QUICK,
+    Instruction::IPUT_QUICK,
+    Instruction::IPUT_WIDE_QUICK,
+    Instruction::IPUT_OBJECT_QUICK,
+    Instruction::INVOKE_VIRTUAL_QUICK,
+    Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
+    Instruction::UNUSED_EB,
+    Instruction::UNUSED_EC,
+    Instruction::UNUSED_ED,
+    Instruction::UNUSED_EE,
+    Instruction::UNUSED_EF,
+    Instruction::UNUSED_F0,
+    Instruction::UNUSED_F1,
+    Instruction::UNUSED_F2,
+    Instruction::UNUSED_F3,
+    Instruction::UNUSED_F4,
+    Instruction::UNUSED_F5,
+    Instruction::UNUSED_F6,
+    Instruction::UNUSED_F7,
+    Instruction::UNUSED_F8,
+    Instruction::UNUSED_F9,
+    Instruction::UNUSED_FA,
+    Instruction::UNUSED_FB,
+    Instruction::UNUSED_FC,
+    Instruction::UNUSED_FD,
+    Instruction::UNUSED_FE,
+    Instruction::UNUSED_FF,
     // ----- ExtendedMIROpcode -----
     kMirOpPhi,
     kMirOpCopy,
@@ -334,88 +407,6 @@
     kMirOpCheck,
     kMirOpCheckPart2,
     kMirOpSelect,
-
-#if ARM64_USE_EXPERIMENTAL_OPCODES
-    Instruction::MOVE_RESULT,
-    Instruction::MOVE_RESULT_WIDE,
-    Instruction::MOVE_RESULT_OBJECT,
-    Instruction::CONST_STRING_JUMBO,
-    Instruction::CONST_CLASS,
-    Instruction::CHECK_CAST,
-    Instruction::INSTANCE_OF,
-    Instruction::ARRAY_LENGTH,
-    Instruction::NEW_INSTANCE,
-    Instruction::NEW_ARRAY,
-    Instruction::FILLED_NEW_ARRAY,
-    Instruction::FILLED_NEW_ARRAY_RANGE,
-    Instruction::FILL_ARRAY_DATA,
-    // Instruction::UNUSED_3E,
-    // Instruction::UNUSED_3F,
-    // Instruction::UNUSED_40,
-    // Instruction::UNUSED_41,
-    // Instruction::UNUSED_42,
-    // Instruction::UNUSED_43,
-    Instruction::AGET,
-    Instruction::AGET_WIDE,
-    Instruction::AGET_OBJECT,
-    Instruction::AGET_BOOLEAN,
-    Instruction::AGET_BYTE,
-    Instruction::AGET_CHAR,
-    Instruction::AGET_SHORT,
-    Instruction::APUT,
-    Instruction::APUT_WIDE,
-    Instruction::APUT_OBJECT,
-    Instruction::APUT_BOOLEAN,
-    Instruction::APUT_BYTE,
-    Instruction::APUT_CHAR,
-    Instruction::APUT_SHORT,
-    Instruction::IPUT_WIDE,
-    Instruction::IGET_WIDE,
-    Instruction::SGET_WIDE,
-    Instruction::SPUT_WIDE,
-    Instruction::INVOKE_VIRTUAL,
-    Instruction::INVOKE_SUPER,
-    Instruction::INVOKE_DIRECT,
-    Instruction::INVOKE_STATIC,
-    Instruction::INVOKE_INTERFACE,
-    Instruction::RETURN_VOID_BARRIER,
-    Instruction::INVOKE_VIRTUAL_RANGE,
-    Instruction::INVOKE_SUPER_RANGE,
-    Instruction::INVOKE_DIRECT_RANGE,
-    Instruction::INVOKE_STATIC_RANGE,
-    Instruction::INVOKE_INTERFACE_RANGE,
-    // Instruction::UNUSED_79,
-    // Instruction::UNUSED_7A,
-    // Instruction::IGET_QUICK,
-    // Instruction::IGET_WIDE_QUICK,
-    // Instruction::IGET_OBJECT_QUICK,
-    // Instruction::IPUT_QUICK,
-    // Instruction::IPUT_WIDE_QUICK,
-    // Instruction::IPUT_OBJECT_QUICK,
-    // Instruction::INVOKE_VIRTUAL_QUICK,
-    // Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
-    // Instruction::UNUSED_EB,
-    // Instruction::UNUSED_EC,
-    // Instruction::UNUSED_ED,
-    // Instruction::UNUSED_EE,
-    // Instruction::UNUSED_EF,
-    // Instruction::UNUSED_F0,
-    // Instruction::UNUSED_F1,
-    // Instruction::UNUSED_F2,
-    // Instruction::UNUSED_F3,
-    // Instruction::UNUSED_F4,
-    // Instruction::UNUSED_F5,
-    // Instruction::UNUSED_F6,
-    // Instruction::UNUSED_F7,
-    // Instruction::UNUSED_F8,
-    // Instruction::UNUSED_F9,
-    // Instruction::UNUSED_FA,
-    // Instruction::UNUSED_FB,
-    // Instruction::UNUSED_FC,
-    // Instruction::UNUSED_FD,
-    // Instruction::UNUSED_FE,
-    // Instruction::UNUSED_FF,
-#endif /* ARM64_USE_EXPERIMENTAL_OPCODES */
 };
 
 // TODO: Remove this when we are able to compile everything.
@@ -792,10 +783,11 @@
                                      uint16_t class_def_idx, uint32_t method_idx,
                                      jobject class_loader, const DexFile& dex_file,
                                      void* llvm_compilation_unit) {
-  VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
+  std::string method_name = PrettyMethod(method_idx, dex_file);
+  VLOG(compiler) << "Compiling " << method_name << "...";
   if (code_item->insns_size_in_code_units_ >= 0x10000) {
     LOG(INFO) << "Method size exceeds compiler limits: " << code_item->insns_size_in_code_units_
-              << " in " << PrettyMethod(method_idx, dex_file);
+              << " in " << method_name;
     return NULL;
   }
 
@@ -827,8 +819,7 @@
   cu.compiler_flip_match = false;
   bool use_match = !cu.compiler_method_match.empty();
   bool match = use_match && (cu.compiler_flip_match ^
-      (PrettyMethod(method_idx, dex_file).find(cu.compiler_method_match) !=
-       std::string::npos));
+      (method_name.find(cu.compiler_method_match) != std::string::npos));
   if (!use_match || match) {
     cu.disable_opt = kCompilerOptimizerDisableFlags;
     cu.enable_debug = kCompilerDebugFlags;
@@ -839,7 +830,7 @@
   if (gVerboseMethods.size() != 0) {
     cu.verbose = false;
     for (size_t i = 0; i < gVerboseMethods.size(); ++i) {
-      if (PrettyMethod(method_idx, dex_file).find(gVerboseMethods[i])
+      if (method_name.find(gVerboseMethods[i])
           != std::string::npos) {
         cu.verbose = true;
         break;
@@ -873,7 +864,9 @@
         (1 << kPromoteCompilerTemps));
   } else if (cu.instruction_set == kX86_64) {
     // TODO(X86_64): enable optimizations once backend is mature enough.
-    cu.disable_opt = ~(uint32_t)0;
+    cu.disable_opt |= (
+        (1 << kLoadStoreElimination) |
+        (1 << kPromoteRegs));
   } else if (cu.instruction_set == kArm64) {
     // TODO(Arm64): enable optimizations once backend is mature enough.
     cu.disable_opt = ~(uint32_t)0;
@@ -894,27 +887,21 @@
     cu.mir_graph->EnableOpcodeCounting();
   }
 
-  // Check early if we should skip this compilation if the profiler is enabled.
-  if (cu.compiler_driver->ProfilePresent()) {
-    std::string methodname = PrettyMethod(method_idx, dex_file);
-    if (cu.mir_graph->SkipCompilation(methodname)) {
-      return NULL;
-    }
-  }
-
   /* Build the raw MIR graph */
   cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
                               class_loader, dex_file);
 
   // TODO(Arm64): Remove this when we are able to compile everything.
   if (!CanCompileMethod(method_idx, dex_file, cu)) {
-    VLOG(compiler) << "Cannot compile method : " << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : " << method_name;
     return nullptr;
   }
 
   cu.NewTimingSplit("MIROpt:CheckFilters");
-  if (cu.mir_graph->SkipCompilation()) {
-    VLOG(compiler) << "Skipping method : " << PrettyMethod(method_idx, dex_file);
+  std::string skip_message;
+  if (cu.mir_graph->SkipCompilation(&skip_message)) {
+    VLOG(compiler) << cu.instruction_set << ": Skipping method : "
+                   << method_name << "  Reason = " << skip_message;
     return nullptr;
   }
 
@@ -922,6 +909,13 @@
   PassDriverMEOpts pass_driver(&cu);
   pass_driver.Launch();
 
+  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
+  if (cu.compiler_driver->ProfilePresent()
+      && !cu.mir_graph->MethodIsLeaf()
+      && cu.mir_graph->SkipCompilationByName(method_name)) {
+    return nullptr;
+  }
+
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
     cu.mir_graph->DumpCheckStats();
   }
@@ -937,7 +931,7 @@
   if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
     if (cu.arena_stack.PeakBytesAllocated() > 256 * 1024) {
       MemStats stack_stats(cu.arena_stack.GetPeakStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(stack_stats);
+      LOG(INFO) << method_name << " " << Dumpable<MemStats>(stack_stats);
     }
   }
   cu.arena_stack.Reset();
@@ -945,7 +939,8 @@
   CompiledMethod* result = NULL;
 
   if (cu.mir_graph->PuntToInterpreter()) {
-    return NULL;
+    VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: " << method_name;
+    return nullptr;
   }
 
   cu.cg->Materialize();
@@ -955,21 +950,21 @@
   cu.NewTimingSplit("Cleanup");
 
   if (result) {
-    VLOG(compiler) << "Compiled " << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler) << cu.instruction_set << ": Compiled " << method_name;
   } else {
-    VLOG(compiler) << "Deferred " << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler) << cu.instruction_set << ": Deferred " << method_name;
   }
 
   if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
     if (cu.arena.BytesAllocated() > (1 * 1024 *1024)) {
       MemStats mem_stats(cu.arena.GetMemStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+      LOG(INFO) << method_name << " " << Dumpable<MemStats>(mem_stats);
     }
   }
 
   if (cu.enable_debug & (1 << kDebugShowSummaryMemoryUsage)) {
     LOG(INFO) << "MEMINFO " << cu.arena.BytesAllocated() << " " << cu.mir_graph->GetNumBlocks()
-              << " " << PrettyMethod(method_idx, dex_file);
+              << " " << method_name;
   }
 
   cu.EndTiming();
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 1350665..e372206 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -941,7 +941,8 @@
   }
 }
 
-bool MIRGraph::ComputeSkipCompilation(MethodStats* stats, bool skip_default) {
+bool MIRGraph::ComputeSkipCompilation(MethodStats* stats, bool skip_default,
+                                      std::string* skip_message) {
   float count = stats->dex_instructions;
   stats->math_ratio = stats->math_ops / count;
   stats->fp_ratio = stats->fp_ops / count;
@@ -994,6 +995,8 @@
   // If significant in size and high proportion of expensive operations, skip.
   if (cu_->compiler_driver->GetCompilerOptions().IsSmallMethod(GetNumDalvikInsns()) &&
       (stats->heavyweight_ratio > 0.3)) {
+    *skip_message = "Is a small method with heavyweight ratio " +
+                    std::to_string(stats->heavyweight_ratio);
     return true;
   }
 
@@ -1003,7 +1006,7 @@
  /*
   * Will eventually want this to be a bit more sophisticated and happen at verification time.
   */
-bool MIRGraph::SkipCompilation() {
+bool MIRGraph::SkipCompilation(std::string* skip_message) {
   const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
   CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
   if (compiler_filter == CompilerOptions::kEverything) {
@@ -1012,10 +1015,12 @@
 
   // Contains a pattern we don't want to compile?
   if (PuntToInterpreter()) {
+    *skip_message = "Punt to interpreter set";
     return true;
   }
 
   if (!compiler_options.IsCompilationEnabled()) {
+    *skip_message = "Compilation disabled";
     return true;
   }
 
@@ -1041,6 +1046,9 @@
 
   // If size < cutoff, assume we'll compile - but allow removal.
   bool skip_compilation = (GetNumDalvikInsns() >= default_cutoff);
+  if (skip_compilation) {
+    *skip_message = "#Insns >= default_cutoff: " + std::to_string(GetNumDalvikInsns());
+  }
 
   /*
    * Filter 1: Huge methods are likely to be machine generated, but some aren't.
@@ -1048,6 +1056,7 @@
    */
   if (compiler_options.IsHugeMethod(GetNumDalvikInsns())) {
     skip_compilation = true;
+    *skip_message = "Huge method: " + std::to_string(GetNumDalvikInsns());
     // If we're got a huge number of basic blocks, don't bother with further analysis.
     if (static_cast<size_t>(num_blocks_) > (compiler_options.GetHugeMethodThreshold() / 2)) {
       return true;
@@ -1055,6 +1064,7 @@
   } else if (compiler_options.IsLargeMethod(GetNumDalvikInsns()) &&
     /* If it's large and contains no branches, it's likely to be machine generated initialization */
       (GetBranchCount() == 0)) {
+    *skip_message = "Large method with no branches";
     return true;
   } else if (compiler_filter == CompilerOptions::kSpeed) {
     // If not huge, compile.
@@ -1063,6 +1073,7 @@
 
   // Filter 2: Skip class initializers.
   if (((cu_->access_flags & kAccConstructor) != 0) && ((cu_->access_flags & kAccStatic) != 0)) {
+    *skip_message = "Class initializer";
     return true;
   }
 
@@ -1092,7 +1103,7 @@
     AnalyzeBlock(bb, &stats);
   }
 
-  return ComputeSkipCompilation(&stats, skip_compilation);
+  return ComputeSkipCompilation(&stats, skip_compilation, skip_message);
 }
 
 void MIRGraph::DoCacheFieldLoweringInfo() {
@@ -1285,7 +1296,7 @@
                                  method_lowering_infos_.GetRawStorage(), count);
 }
 
-bool MIRGraph::SkipCompilation(const std::string& methodname) {
+bool MIRGraph::SkipCompilationByName(const std::string& methodname) {
   return cu_->compiler_driver->SkipCompilation(methodname);
 }
 
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 15c0aa4..0ff340e 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -559,12 +559,12 @@
    * Examine the graph to determine whether it's worthwile to spend the time compiling
    * this method.
    */
-  bool SkipCompilation();
+  bool SkipCompilation(std::string* skip_message);
 
   /*
    * Should we skip the compilation of this method based on its name?
    */
-  bool SkipCompilation(const std::string& methodname);
+  bool SkipCompilationByName(const std::string& methodname);
 
   /*
    * Parse dex method and add MIR at current insert point.  Returns id (which is
@@ -1127,7 +1127,8 @@
 
   void CountChecks(BasicBlock* bb);
   void AnalyzeBlock(BasicBlock* bb, struct MethodStats* stats);
-  bool ComputeSkipCompilation(struct MethodStats* stats, bool skip_default);
+  bool ComputeSkipCompilation(struct MethodStats* stats, bool skip_default,
+                              std::string* skip_message);
 
   CompilationUnit* const cu_;
   GrowableArray<int>* ssa_base_vregs_;
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5466abd..590c767 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -190,7 +190,7 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (Runtime::Current()->ExplicitNullChecks()) {
+      if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
@@ -261,7 +261,7 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (Runtime::Current()->ExplicitNullChecks()) {
+      if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
@@ -356,13 +356,13 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                            (static_cast<size_t>(frame_size_) <
-                            Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm);
   NewLIR0(kPseudoMethodEntry);
-  bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  constexpr size_t kStackOverflowReservedUsableBytes = kArmStackOverflowReservedBytes -
+      Thread::kStackOverflowSignalReservedBytes;
+  bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       if (!large_frame) {
         /* Load stack limit */
         LockTemp(rs_r12);
@@ -381,7 +381,7 @@
       // This is done before the callee save instructions to avoid any possibility
       // of these overflowing.  This uses r12 and that's never saved in a callee
       // save.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, kArmStackOverflowReservedBytes);
       Load32Disp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
     }
@@ -401,7 +401,7 @@
   const int spill_size = spill_count * 4;
   const int frame_size_without_spills = frame_size_ - spill_size;
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       class StackOverflowSlowPath : public LIRSlowPath {
        public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index c1ce03d..3f32c51 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -219,7 +219,8 @@
   kA64First = 0,
   kA64Adc3rrr = kA64First,  // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
   kA64Add4RRdT,      // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Add4rrro,      // add [00001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Add4rrro,      // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Add4RRre,      // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Adr2xd,        // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
   kA64And3Rrl,       // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
   kA64And4rrro,      // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
@@ -328,7 +329,8 @@
   kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
   kA64Stlxr3wrX,     // stlxr[11001000000] rs[20-16] [111111] rn[9-5] rt[4-0].
   kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Sub4RRre,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
   kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
   kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index c5bd005..2a8da24 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -115,6 +115,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1,
                  "add", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4RRre), SF_VARIANTS(0x0b200000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
+                 kFmtExtend, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "add", "!0r, !1r, !2r!3e", kFixupNone),
     // Note: adr is binary, but declared as tertiary. The third argument is used while doing the
     //   fixups and contains information to identify the adr label.
     ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000),
@@ -558,6 +562,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
                  "sub", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4RRre), SF_VARIANTS(0x4b200000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
+                 kFmtExtend, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "sub", "!0r, !1r, !2r!3e", kFixupNone),
     ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000),
                  kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index f1748ef..1df576b 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -95,8 +95,7 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  // TODO(Arm64): generate "add x1, x1, w3, sxtw" rather than "add x1, x1, x3"?
-  OpRegRegRegShift(kOpAdd, r_base, r_base, As64BitReg(r_disp), ENCODE_NO_SHIFT);
+  OpRegRegRegExtend(kOpAdd, r_base, r_base, As64BitReg(r_disp), kA64Sxtw, 0U);
   NewLIR1(kA64Br1x, r_base.GetReg());
 
   // Loop exit label.
@@ -141,7 +140,6 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"?
   LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
 
   // Get base branch address.
@@ -150,8 +148,7 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  // TODO(Arm64): generate "add x4, x4, w3, sxtw" rather than "add x4, x4, x3"?
-  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), ENCODE_NO_SHIFT);
+  OpRegRegRegExtend(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), kA64Sxtw, 0U);
   NewLIR1(kA64Br1x, branch_reg.GetReg());
 
   // branch_over target here
@@ -213,7 +210,7 @@
     null_check_branch = nullptr;  // No null check.
   } else {
     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
@@ -261,7 +258,7 @@
     null_check_branch = nullptr;  // No null check.
   } else {
     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
@@ -337,19 +334,19 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                              (static_cast<size_t>(frame_size_) <
-                              Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64);
 
   NewLIR0(kPseudoMethodEntry);
 
-  const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  constexpr size_t kStackOverflowReservedUsableBytes = kArm64StackOverflowReservedBytes -
+        Thread::kStackOverflowSignalReservedBytes;
+  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
   const int spill_count = num_core_spills_ + num_fp_spills_;
   const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
   const int frame_size_without_spills = frame_size_ - spill_size;
 
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       if (!large_frame) {
         // Load stack limit
         LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
@@ -382,7 +379,7 @@
   }
 
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       class StackOverflowSlowPath: public LIRSlowPath {
       public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
@@ -412,7 +409,7 @@
         // Branch to throw target if there is not enough room.
         OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
         LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
         OpRegCopy(rs_rA64_SP, rs_x9);  // Establish stack after checks.
       } else {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index b1b83f0..f1270ec 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -87,7 +87,9 @@
                           OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                               RegStorage r_src, OpSize size) OVERRIDE;
-    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
+    LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
+                           int offset, int check_value, LIR* target) OVERRIDE;
 
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg);
@@ -239,6 +241,8 @@
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
     LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
                           int shift);
+    LIR* OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
+                           A64RegExtEncodings ext, uint8_t amount);
     LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     static const ArmEncodingMap EncodingMap[kA64Last];
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 2c6b11d..2ac4adb 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -160,6 +160,19 @@
   return branch;
 }
 
+LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
+                                     RegStorage base_reg, int offset, int check_value,
+                                     LIR* target) {
+  // It is possible that temp register is 64-bit. (ArgReg or RefReg)
+  // Always compare 32-bit value no matter what temp_reg is.
+  if (temp_reg.Is64Bit()) {
+    temp_reg = As32BitReg(temp_reg);
+  }
+  Load32Disp(base_reg, offset, temp_reg);
+  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
+  return branch;
+}
+
 LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
   bool dest_is_fp = r_dest.IsFloat();
   bool src_is_fp = r_src.IsFloat();
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index fba368a..06e1cda 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -1163,7 +1163,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 2254b8b..672aa88 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -99,7 +99,8 @@
 
   LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
   if (data_target == NULL) {
-    data_target = AddWordData(&literal_list_, value);
+    // Wide, as we need 8B alignment.
+    data_target = AddWideData(&literal_list_, value, 0);
   }
 
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
@@ -643,6 +644,44 @@
   }
 }
 
+LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
+                                     RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (op) {
+    case kOpAdd:
+      opcode = kA64Add4RRre;
+      break;
+    case kOpSub:
+      opcode = kA64Sub4RRre;
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented opcode: " << op;
+      break;
+  }
+  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
+
+  if (r_dest.Is64Bit()) {
+    CHECK(r_src1.Is64Bit());
+
+    // dest determines whether the op is wide or not. Up-convert src2 when necessary.
+    // Note: this is not according to aarch64 specifications, but our encoding.
+    if (!r_src2.Is64Bit()) {
+      r_src2 = As64BitReg(r_src2);
+    }
+  } else {
+    CHECK(!r_src1.Is64Bit());
+    CHECK(!r_src2.Is64Bit());
+  }
+
+  // Sanity checks.
+  //    1) Amount is in the range 0..4
+  CHECK_LE(amount, 4);
+
+  return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
+                 EncodeExtend(ext, amount));
+}
+
 LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
   return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
 }
@@ -660,6 +699,7 @@
   int32_t log_imm = -1;
   bool is_wide = r_dest.Is64Bit();
   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+  int info = 0;
 
   switch (op) {
     case kOpLsl: {
@@ -692,7 +732,8 @@
         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
       } else {
         log_imm = -1;
-        alt_opcode = (neg) ? kA64Add4rrro : kA64Sub4rrro;
+        alt_opcode = (neg) ? kA64Add4RRre : kA64Sub4RRre;
+        info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
       }
       break;
     // case kOpRsub:
@@ -734,8 +775,8 @@
   if (log_imm >= 0) {
     return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
   } else {
-    RegStorage r_scratch = AllocTemp();
-    if (IS_WIDE(wide)) {
+    RegStorage r_scratch;
+    if (is_wide) {
       r_scratch = AllocTempWide();
       LoadConstantWide(r_scratch, value);
     } else {
@@ -743,7 +784,7 @@
       LoadConstant(r_scratch, value);
     }
     if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
-      res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
+      res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
     else
       res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
     FreeTemp(r_scratch);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 6397208..3f9379c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -292,10 +292,14 @@
   return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
 }
 
-bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index) {
+bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
-  return it != inline_methods_.end() && (it->second.flags & kInlineIntrinsic) != 0;
+  bool res = (it != inline_methods_.end() && (it->second.flags & kInlineIntrinsic) != 0);
+  if (res && intrinsic != nullptr) {
+    *intrinsic = it->second;
+  }
+  return res;
 }
 
 bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) {
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index c03f89c..70693c2 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -67,7 +67,7 @@
     /**
      * Check whether a particular method index corresponds to an intrinsic function.
      */
-    bool IsIntrinsic(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+    bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_);
 
     /**
      * Generate code for an intrinsic function invocation.
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 3b99421..e36b592 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -173,7 +173,7 @@
 
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
-  if (Runtime::Current()->ExplicitNullChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     return GenExplicitNullCheck(m_reg, opt_flags);
   }
   return nullptr;
@@ -188,7 +188,7 @@
 }
 
 void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) {
-  if (!Runtime::Current()->ExplicitNullChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
@@ -197,13 +197,13 @@
 }
 
 void Mir2Lir::MarkPossibleStackOverflowException() {
-  if (!Runtime::Current()->ExplicitStackOverflowChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
     MarkSafepointPC(last_lir_insn_);
   }
 }
 
 void Mir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags) {
-  if (!Runtime::Current()->ExplicitNullChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
@@ -2171,7 +2171,7 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTest(int opt_flags) {
-  if (Runtime::Current()->ExplicitSuspendChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitSuspendChecks()) {
     if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
       return;
     }
@@ -2191,7 +2191,7 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
-  if (Runtime::Current()->ExplicitSuspendChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitSuspendChecks()) {
     if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
       OpUnconditionalBranch(target);
       return;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 641579f..638c590 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -363,20 +363,27 @@
 INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationRegLocation, RegLocation arg0,
             RegLocation arg1, bool safepoint_pc)
 
+// TODO: This is a hack! Reshape the two macros into functions and move them to a better place.
+#define IsSameReg(r1, r2) \
+  (GetRegInfo(r1)->Master()->GetReg().GetReg() == GetRegInfo(r2)->Master()->GetReg().GetReg())
+#define TargetArgReg(arg, is_wide) \
+  (GetRegInfo(TargetReg(arg))->FindMatchingView( \
+     (is_wide) ? RegisterInfo::k64SoloStorageMask : RegisterInfo::k32SoloStorageMask)->GetReg())
+
 void Mir2Lir::CopyToArgumentRegs(RegStorage arg0, RegStorage arg1) {
-  if (arg1.GetReg() == TargetReg(kArg0).GetReg()) {
-    if (arg0.GetReg() == TargetReg(kArg1).GetReg()) {
+  if (IsSameReg(arg1, TargetReg(kArg0))) {
+    if (IsSameReg(arg0, TargetReg(kArg1))) {
       // Swap kArg0 and kArg1 with kArg2 as temp.
-      OpRegCopy(TargetReg(kArg2), arg1);
-      OpRegCopy(TargetReg(kArg0), arg0);
-      OpRegCopy(TargetReg(kArg1), TargetReg(kArg2));
+      OpRegCopy(TargetArgReg(kArg2, arg1.Is64Bit()), arg1);
+      OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
+      OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), TargetReg(kArg2));
     } else {
-      OpRegCopy(TargetReg(kArg1), arg1);
-      OpRegCopy(TargetReg(kArg0), arg0);
+      OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), arg1);
+      OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
     }
   } else {
-    OpRegCopy(TargetReg(kArg0), arg0);
-    OpRegCopy(TargetReg(kArg1), arg1);
+    OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
+    OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), arg1);
   }
 }
 
@@ -977,7 +984,7 @@
                            type, skip_this);
 
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
@@ -1204,7 +1211,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 6469d9c..d6f6ea1 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -214,7 +214,11 @@
   if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) {
     def_start = last_lir_insn_;
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+    if (rl_dest.ref) {
+      StoreRefDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+    } else {
+      Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+    }
     MarkClean(rl_dest);
     def_end = last_lir_insn_;
     if (!rl_dest.ref) {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index c734202..e53105f 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -305,8 +305,7 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-      (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kMips);
   NewLIR0(kPseudoMethodEntry);
   RegStorage check_reg = AllocTemp();
   RegStorage new_sp = AllocTemp();
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 9155677..f70087d 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -25,6 +25,7 @@
 #include "dex/backend.h"
 #include "dex/quick/resource_mask.h"
 #include "driver/compiler_driver.h"
+#include "instruction_set.h"
 #include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
@@ -206,6 +207,36 @@
 #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
 #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
 
+// Size of a frame that we definitely consider large. Anything larger than this should
+// definitely get a stack overflow check.
+static constexpr size_t kLargeFrameSize = 2 * KB;
+
+// Size of a frame that should be small. Anything leaf method smaller than this should run
+// without a stack overflow check.
+// The constant is from experience with frameworks code.
+static constexpr size_t kSmallFrameSize = 1 * KB;
+
+// Determine whether a frame is small or large, used in the decision on whether to elide a
+// stack overflow check on method entry.
+//
+// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
+// overflow-usable stack space.
+static constexpr bool IsLargeFrame(size_t size, InstructionSet isa) {
+  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
+}
+
+// We want to ensure that on all systems kSmallFrameSize will lead to false in IsLargeFrame.
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm),
+               kSmallFrameSize_is_not_a_small_frame_arm);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm64),
+               kSmallFrameSize_is_not_a_small_frame_arm64);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kMips),
+               kSmallFrameSize_is_not_a_small_frame_mips);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86),
+               kSmallFrameSize_is_not_a_small_frame_x86);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86_64),
+               kSmallFrameSize_is_not_a_small_frame_x64_64);
+
 class Mir2Lir : public Backend {
   public:
     /*
@@ -953,8 +984,8 @@
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     bool GenInlinedAbsInt(CallInfo* info);
     virtual bool GenInlinedAbsLong(CallInfo* info);
-    bool GenInlinedAbsFloat(CallInfo* info);
-    bool GenInlinedAbsDouble(CallInfo* info);
+    virtual bool GenInlinedAbsFloat(CallInfo* info);
+    virtual bool GenInlinedAbsDouble(CallInfo* info);
     bool GenInlinedFloatCvt(CallInfo* info);
     bool GenInlinedDoubleCvt(CallInfo* info);
     virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 3f54798..f06f08e 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -325,11 +325,21 @@
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
+#define EXT_0F_REX_NO_PREFIX_ENCODING_MAP(opname, opcode, reg_def) \
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
 #define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
 { kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
+#define EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(opname, opcode, reg_def) \
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
 #define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \
 { kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
@@ -481,6 +491,10 @@
   EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movsx8,  0x00, 0xBE, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0),
+  EXT_0F_REX_NO_PREFIX_ENCODING_MAP(Movzx8q, 0xB6, REG_DEF0),
+  EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(Movzx16q, 0xB7, REG_DEF0),
+  EXT_0F_REX_NO_PREFIX_ENCODING_MAP(Movsx8q, 0xBE, REG_DEF0),
+  EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(Movsx16q, 0xBF, REG_DEF0),
 #undef EXT_0F_ENCODING_MAP
 
   { kX86Jcc8,  kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0,             0, 0x70, 0,    0, 0, 0, 0, false }, "Jcc8",  "!1c !0t" },
@@ -827,7 +841,8 @@
       CHECK(strchr(entry->name, '8') != nullptr) << entry->name;
     } else {
       if (entry->skeleton.immediate_bytes != 1) {  // Ignore ...I8 instructions.
-        if (!StartsWith(entry->name, "Movzx8") && !StartsWith(entry->name, "Movsx8")) {
+        if (!StartsWith(entry->name, "Movzx8") && !StartsWith(entry->name, "Movsx8")
+           && !StartsWith(entry->name, "Movzx8q") && !StartsWith(entry->name, "Movsx8q")) {
           CHECK(strchr(entry->name, '8') == nullptr) << entry->name;
         }
       }
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index dd5dab2..28195ab 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -235,8 +235,8 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  const bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-      (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
+      !IsLargeFrame(frame_size_, Gen64Bit() ? kX86_64 : kX86);
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index d874aaa..d482e58 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -155,6 +155,8 @@
   bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
   bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
   bool GenInlinedSqrt(CallInfo* info);
+  bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
+  bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
   bool GenInlinedPeek(CallInfo* info, OpSize size);
   bool GenInlinedPoke(CallInfo* info, OpSize size);
   void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
@@ -796,6 +798,14 @@
    */
   void AnalyzeDoubleUse(RegLocation rl_use);
 
+  /*
+   * @brief Analyze one invoke-static MIR instruction
+   * @param opcode MIR instruction opcode.
+   * @param bb Basic block containing instruction.
+   * @param mir Instruction to analyze.
+   */
+  void AnalyzeInvokeStatic(int opcode, BasicBlock * bb, MIR *mir);
+
   bool Gen64Bit() const  { return gen64bit_; }
 
   // Information derived from analysis of MIR
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 458f9c6..5082d60 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -221,7 +221,7 @@
       LoadConstant(rl_result.reg, 0x7fffffff);
       NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
       NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
-      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
       LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
       NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
       LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
@@ -242,7 +242,7 @@
       LoadConstant(rl_result.reg, 0x7fffffff);
       NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
       NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
-      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
       LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
       NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
       LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
@@ -281,7 +281,7 @@
         LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
         NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
         NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
-        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
         LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
         NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
         LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
@@ -306,7 +306,7 @@
         LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
         NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
         NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
-        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
         LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
         NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
         LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
@@ -568,8 +568,11 @@
   rl_src = LoadValueWide(rl_src, kCoreReg);
   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   if (Gen64Bit()) {
-    LoadConstantWide(rl_result.reg, 0x8000000000000000);
-    OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
+    OpRegCopy(rl_result.reg, rl_src.reg);
+    // Flip sign bit.
+    NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1);
+    NewLIR2(kX86Xor64RI, rl_result.reg.GetReg(), 1);
+    NewLIR2(kX86Ror64RI, rl_result.reg.GetReg(), 1);
   } else {
     OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
     OpRegCopy(rl_result.reg, rl_src.reg);
@@ -587,5 +590,107 @@
   return true;
 }
 
+bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  // Get the argument
+  RegLocation rl_src = info->args[0];
+
+  // Get the inlined intrinsic target virtual register
+  RegLocation rl_dest = InlineTarget(info);
+
+  // Get the virtual register number
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  if (rl_dest.s_reg_low == INVALID_SREG) {
+    // Result is unused, the code is dead. Inlining successful, no code generated.
+    return true;
+  }
+  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
+  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
+
+  // if argument is the same as inlined intrinsic target
+  if (v_src_reg == v_dst_reg) {
+    rl_src = UpdateLoc(rl_src);
+
+    // if argument is in the physical register
+    if (rl_src.location == kLocPhysReg) {
+      rl_src = LoadValue(rl_src, kCoreReg);
+      OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
+      StoreValue(rl_dest, rl_src);
+      return true;
+    }
+    // the argument is in memory
+    DCHECK((rl_src.location == kLocDalvikFrame) ||
+         (rl_src.location == kLocCompilerTemp));
+
+    // Operate directly into memory.
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    LIR *lir = NewLIR3(kX86And32MI, TargetReg(kSp).GetReg(), displacement, 0x7fffffff);
+    AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
+    AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
+    return true;
+  } else {
+    rl_src = LoadValue(rl_src, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
+    StoreValue(rl_dest, rl_result);
+    return true;
+  }
+}
+
+bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  RegLocation rl_src = info->args[0];
+  RegLocation rl_dest = InlineTargetWide(info);
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  if (rl_dest.s_reg_low == INVALID_SREG) {
+    // Result is unused, the code is dead. Inlining successful, no code generated.
+    return true;
+  }
+  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
+  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
+  rl_src = UpdateLocWide(rl_src);
+
+  // if argument is in the physical XMM register
+  if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
+    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+    if (rl_result.reg != rl_src.reg) {
+      LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+    } else {
+      RegStorage sign_mask = AllocTempDouble();
+      LoadConstantWide(sign_mask, 0x7fffffffffffffff);
+      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
+      FreeTemp(sign_mask);
+    }
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  } else if (v_src_reg == v_dst_reg) {
+    // if argument is the same as inlined intrinsic target
+    // if argument is in the physical register
+    if (rl_src.location == kLocPhysReg) {
+      rl_src = LoadValueWide(rl_src, kCoreReg);
+      OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
+      StoreValueWide(rl_dest, rl_src);
+      return true;
+    }
+    // the argument is in memory
+    DCHECK((rl_src.location == kLocDalvikFrame) ||
+           (rl_src.location == kLocCompilerTemp));
+
+    // Operate directly into memory.
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    LIR *lir = NewLIR3(kX86And32MI, TargetReg(kSp).GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
+    return true;
+  } else {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegCopyWide(rl_result.reg, rl_src.reg);
+    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  }
+}
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index b342813..2f914c1 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -35,14 +35,13 @@
     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegReg(kOpXor, rl_result.reg, rl_result.reg);  // result = 0
-    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondNe);  // result = (src1 != src2) ? 1 : result
     RegStorage temp_reg = AllocTemp();
-    OpRegReg(kOpNeg, temp_reg, rl_result.reg);
     OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-    // result = (src1 < src2) ? -result : result
-    OpCondRegReg(kOpCmov, kCondLt, rl_result.reg, temp_reg);
+    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
+    NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
+    NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
+    NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+
     StoreValue(rl_dest, rl_result);
     FreeTemp(temp_reg);
     return;
@@ -323,12 +322,22 @@
     return;
   }
 
+  if (Gen64Bit()) {
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+
+    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+    OpCondBranch(ccode, taken);
+    return;
+  }
+
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage
   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
   LoadValueDirectWideFixed(rl_src1, r_tmp1);
   LoadValueDirectWideFixed(rl_src2, r_tmp2);
+
   // Swap operands and condition code to prevent use of zero flag.
   if (ccode == kCondLe || ccode == kCondGt) {
     // Compute (r3:r2) = (r3:r2) - (r1:r0)
@@ -366,6 +375,23 @@
   LIR* taken = &block_label_list_[bb->taken];
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
+
+  if (Gen64Bit()) {
+    if (is_equality_test && val == 0) {
+      // We can simplify of comparing for ==, != to 0.
+      NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
+    } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
+      OpRegImm(kOpCmp, rl_src1.reg, val_lo);
+    } else {
+      RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
+      LoadConstantWide(tmp, val);
+      OpRegReg(kOpCmp, rl_src1.reg, tmp);
+      FreeTemp(tmp);
+    }
+    OpCondBranch(ccode, taken);
+    return;
+  }
+
   if (is_equality_test && val != 0) {
     rl_src1 = ForceTempWide(rl_src1);
   }
@@ -373,7 +399,7 @@
   RegStorage high_reg = rl_src1.reg.GetHigh();
 
   if (is_equality_test) {
-    // We can simpolify of comparing for ==, != to 0.
+    // We can simplify of comparing for ==, != to 0.
     if (val == 0) {
       if (IsTemp(low_reg)) {
         OpRegReg(kOpOr, low_reg, high_reg);
@@ -1582,8 +1608,8 @@
   LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
 
   // RHS is -1.
-  LoadConstantWide(rs_r3q, 0x8000000000000000);
-  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r3q.GetReg());
+  LoadConstantWide(rs_r6q, 0x8000000000000000);
+  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
   LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
 
   // In 0x8000000000000000/-1 case.
@@ -2174,6 +2200,7 @@
     if (rl_dest.location == kLocPhysReg &&
         rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
+      OpRegCopy(rl_dest.reg, rl_src1.reg);
       NewLIR2(x86op, rl_dest.reg.GetReg(), val);
       StoreFinalValueWide(rl_dest, rl_dest);
       return true;
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 483d8cf..078dd5a 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -441,6 +441,31 @@
   Clobber(rs_rCX);
   Clobber(rs_rDX);
   Clobber(rs_rBX);
+
+  Clobber(rs_fr0);
+  Clobber(rs_fr1);
+  Clobber(rs_fr2);
+  Clobber(rs_fr3);
+  Clobber(rs_fr4);
+  Clobber(rs_fr5);
+  Clobber(rs_fr6);
+  Clobber(rs_fr7);
+
+  if (Gen64Bit()) {
+    Clobber(rs_r8);
+    Clobber(rs_r9);
+    Clobber(rs_r10);
+    Clobber(rs_r11);
+
+    Clobber(rs_fr8);
+    Clobber(rs_fr9);
+    Clobber(rs_fr10);
+    Clobber(rs_fr11);
+    Clobber(rs_fr12);
+    Clobber(rs_fr13);
+    Clobber(rs_fr14);
+    Clobber(rs_fr15);
+  }
 }
 
 RegLocation X86Mir2Lir::GetReturnWideAlt() {
@@ -2177,7 +2202,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index b93e3e8..ac5162e 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -18,6 +18,8 @@
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/dataflow_iterator-inl.h"
 #include "x86_lir.h"
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 
 namespace art {
 
@@ -609,8 +611,12 @@
         if (val_lo < 0) {
           val_hi += 1;
         }
-        res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi);
-        NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32);
+        if (val_hi != 0) {
+          res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi);
+          NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32);
+        } else {
+          res = NewLIR2(kX86Xor64RR, r_dest.GetReg(), r_dest.GetReg());
+        }
         if (val_lo != 0) {
           NewLIR2(kX86Add64RI, r_dest.GetReg(), val_lo);
         }
@@ -953,6 +959,9 @@
     case Instruction::PACKED_SWITCH:
       store_method_addr_ = true;
       break;
+    case Instruction::INVOKE_STATIC:
+      AnalyzeInvokeStatic(opcode, bb, mir);
+      break;
     default:
       // Other instructions are not interesting yet.
       break;
@@ -1020,4 +1029,22 @@
   DCHECK(CheckCorePoolSanity());
   return loc;
 }
+
+void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock * bb, MIR *mir) {
+  uint32_t index = mir->dalvikInsn.vB;
+  if (!(mir->optimization_flags & MIR_INLINED)) {
+    DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
+    InlineMethod method;
+    if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
+        ->IsIntrinsic(index, &method)) {
+      switch (method.opcode) {
+        case kIntrinsicAbsDouble:
+          store_method_addr_ = true;
+          break;
+        default:
+          break;
+      }
+    }
+  }
+}
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 28b9dca..17c44bc 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -609,6 +609,10 @@
   Binary0fOpCode(kX86Movzx16),  // zero-extend 16-bit value
   Binary0fOpCode(kX86Movsx8),   // sign-extend 8-bit value
   Binary0fOpCode(kX86Movsx16),  // sign-extend 16-bit value
+  Binary0fOpCode(kX86Movzx8q),   // zero-extend 8-bit value to quad word
+  Binary0fOpCode(kX86Movzx16q),  // zero-extend 16-bit value to quad word
+  Binary0fOpCode(kX86Movsx8q),   // sign-extend 8-bit value to quad word
+  Binary0fOpCode(kX86Movsx16q),  // sign-extend 16-bit value to quad word
 #undef Binary0fOpCode
   kX86Jcc8, kX86Jcc32,  // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned
   kX86Jmp8, kX86Jmp32,  // jmp rel8/32; lir operands - 0: rel, target assigned
@@ -707,6 +711,8 @@
 #define REX_X 0x42
 // Extension of the ModR/M r/m field, SIB base field, or Opcode reg field
 #define REX_B 0x41
+// Extended register set
+#define REX 0x40
 // Mask extracting the least 3 bits of r0..r15
 #define kRegNumMask32 0x07
 // Value indicating that base or reg is not used
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 3e326f0..4a331fc 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2054,7 +2054,9 @@
   ProfileFile::ProfileData data;
   if (!profile_file_.GetProfileData(&data, method_name)) {
     // Not in profile, no information can be determined.
-    VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
+    if (kIsDebugBuild) {
+      VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
+    }
     return true;
   }
 
@@ -2063,13 +2065,16 @@
   // falls inside a bucket.
   bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
                  <= compiler_options_->GetTopKProfileThreshold();
-  if (compile) {
-    LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
-        << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
-        << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
-  } else {
-    VLOG(compiler) << "not compiling method " << method_name << " because it's not part of leading "
-        << compiler_options_->GetTopKProfileThreshold() << "% samples)";
+  if (kIsDebugBuild) {
+    if (compile) {
+      LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
+          << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
+          << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
+    } else {
+      VLOG(compiler) << "not compiling method " << method_name
+          << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold()
+          << "% samples)";
+    }
   }
   return !compile;
 }
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 5d1c5da..fb3341b 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -53,7 +53,10 @@
     num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
     generate_gdb_information_(false),
     top_k_profile_threshold_(kDefaultTopKProfileThreshold),
-    include_debug_symbols_(kDefaultIncludeDebugSymbols)
+    include_debug_symbols_(kDefaultIncludeDebugSymbols),
+    explicit_null_checks_(true),
+    explicit_so_checks_(true),
+    explicit_suspend_checks_(true)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(false)
 #endif
@@ -67,7 +70,10 @@
                   size_t num_dex_methods_threshold,
                   bool generate_gdb_information,
                   double top_k_profile_threshold,
-                  bool include_debug_symbols
+                  bool include_debug_symbols,
+                  bool explicit_null_checks,
+                  bool explicit_so_checks,
+                  bool explicit_suspend_checks
 #ifdef ART_SEA_IR_MODE
                   , bool sea_ir_mode
 #endif
@@ -80,7 +86,10 @@
     num_dex_methods_threshold_(num_dex_methods_threshold),
     generate_gdb_information_(generate_gdb_information),
     top_k_profile_threshold_(top_k_profile_threshold),
-    include_debug_symbols_(include_debug_symbols)
+    include_debug_symbols_(include_debug_symbols),
+    explicit_null_checks_(explicit_null_checks),
+    explicit_so_checks_(explicit_so_checks),
+    explicit_suspend_checks_(explicit_suspend_checks)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(sea_ir_mode)
 #endif
@@ -147,6 +156,30 @@
     return include_debug_symbols_;
   }
 
+  bool GetExplicitNullChecks() const {
+    return explicit_null_checks_;
+  }
+
+  void SetExplicitNullChecks(bool new_val) {
+    explicit_null_checks_ = new_val;
+  }
+
+  bool GetExplicitStackOverflowChecks() const {
+    return explicit_so_checks_;
+  }
+
+  void SetExplicitStackOverflowChecks(bool new_val) {
+    explicit_so_checks_ = new_val;
+  }
+
+  bool GetExplicitSuspendChecks() const {
+    return explicit_suspend_checks_;
+  }
+
+  void SetExplicitSuspendChecks(bool new_val) {
+    explicit_suspend_checks_ = new_val;
+  }
+
 #ifdef ART_SEA_IR_MODE
   bool GetSeaIrMode();
 #endif
@@ -166,6 +199,9 @@
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool include_debug_symbols_;
+  bool explicit_null_checks_;
+  bool explicit_so_checks_;
+  bool explicit_suspend_checks_;
 #ifdef ART_SEA_IR_MODE
   bool sea_ir_mode_;
 #endif
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c6b9161..4590880 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -800,6 +800,7 @@
 size_t OatWriter::InitOatCode(size_t offset) {
   // calculate the offsets within OatHeader to executable code
   size_t old_offset = offset;
+  size_t adjusted_offset = offset;
   // required to be on a new page boundary
   offset = RoundUp(offset, kPageSize);
   oat_header_->SetExecutableOffset(offset);
@@ -809,7 +810,8 @@
 
     #define DO_TRAMPOLINE(field, fn_name) \
       offset = CompiledCode::AlignCode(offset, instruction_set); \
-      oat_header_->Set ## fn_name ## Offset(offset); \
+      adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
+      oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
       field.reset(compiler_driver_->Create ## fn_name()); \
       offset += field->size();
 
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index ac84d6a..d5225c1 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -30,11 +30,7 @@
 namespace arm {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  // NOTE: the assembler used here is ARM, not Thumb.  This is because the address
-  // returned by this function is a pointer and for thumb we would have to set the
-  // bottom bit.  It doesn't matter since the instructions generated are the same
-  // size anyway.
-  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm)));
+  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kThumb2)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index c3f2082..38051ea 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -744,6 +744,20 @@
   *parsed_value = value;
 }
 
+void CheckExplicitCheckOptions(InstructionSet isa, bool* explicit_null_checks,
+                               bool* explicit_so_checks, bool* explicit_suspend_checks) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      break;  // All checks implemented, leave as is.
+
+    default:  // No checks implemented, reset all to explicit checks.
+      *explicit_null_checks = true;
+      *explicit_so_checks = true;
+      *explicit_suspend_checks = true;
+  }
+}
+
 static int dex2oat(int argc, char** argv) {
 #if defined(__linux__) && defined(__arm__)
   int major, minor;
@@ -825,6 +839,11 @@
   bool watch_dog_enabled = !kIsTargetBuild;
   bool generate_gdb_information = kIsDebugBuild;
 
+  bool explicit_null_checks = true;
+  bool explicit_so_checks = true;
+  bool explicit_suspend_checks = true;
+  bool has_explicit_checks_options = false;
+
   for (int i = 0; i < argc; i++) {
     const StringPiece option(argv[i]);
     const bool log_options = false;
@@ -998,6 +1017,31 @@
     } else if (option.starts_with("--dump-cfg-passes=")) {
       std::string dump_passes = option.substr(strlen("--dump-cfg-passes=")).data();
       PassDriverMEOpts::SetDumpPassList(dump_passes);
+    } else if (option.starts_with("--implicit-checks=")) {
+      std::string checks = option.substr(strlen("--implicit-checks=")).data();
+      std::vector<std::string> checkvec;
+      Split(checks, ',', checkvec);
+      for (auto& str : checkvec) {
+        std::string val = Trim(str);
+        if (val == "none") {
+          explicit_null_checks = true;
+          explicit_so_checks = true;
+          explicit_suspend_checks = true;
+        } else if (val == "null") {
+          explicit_null_checks = false;
+        } else if (val == "suspend") {
+          explicit_suspend_checks = false;
+        } else if (val == "stack") {
+          explicit_so_checks = false;
+        } else if (val == "all") {
+          explicit_null_checks = false;
+          explicit_so_checks = false;
+          explicit_suspend_checks = false;
+        } else {
+          Usage("--implicit-checks passed non-recognized value %s", val.c_str());
+        }
+      }
+      has_explicit_checks_options = true;
     } else {
       Usage("Unknown argument %s", option.data());
     }
@@ -1033,7 +1077,7 @@
 
   bool image = (!image_filename.empty());
   if (!image && boot_image_filename.empty()) {
-    boot_image_filename += GetAndroidRoot();
+    boot_image_filename += android_root;
     boot_image_filename += "/framework/boot.art";
   }
   std::string boot_image_option;
@@ -1093,10 +1137,8 @@
   }
 
   if (compiler_filter_string == nullptr) {
-    if ((instruction_set == kX86_64 && image) ||
-        instruction_set == kArm64 ||
-        instruction_set == kMips) {
-      // TODO: implement/fix compilers for these architectures.
+    if (instruction_set == kMips) {
+      // TODO: fix compiler for Mips.
       compiler_filter_string = "interpret-only";
     } else if (image) {
       compiler_filter_string = "speed";
@@ -1126,6 +1168,10 @@
     Usage("Unknown --compiler-filter value %s", compiler_filter_string);
   }
 
+  CheckExplicitCheckOptions(instruction_set, &explicit_null_checks, &explicit_so_checks,
+                            &explicit_suspend_checks);
+
+  LOG(INFO) << "init compiler options for explicit null: " << explicit_null_checks;
   CompilerOptions compiler_options(compiler_filter,
                                    huge_method_threshold,
                                    large_method_threshold,
@@ -1134,7 +1180,10 @@
                                    num_dex_methods_threshold,
                                    generate_gdb_information,
                                    top_k_profile_threshold,
-                                   include_debug_symbols
+                                   include_debug_symbols,
+                                   explicit_null_checks,
+                                   explicit_so_checks,
+                                   explicit_suspend_checks
 #ifdef ART_SEA_IR_MODE
                                    , compiler_options.sea_ir_ = true;
 #endif
@@ -1205,6 +1254,28 @@
     return EXIT_FAILURE;
   }
   std::unique_ptr<Dex2Oat> dex2oat(p_dex2oat);
+
+  // TODO: Not sure whether it's a good idea to allow anything else but the runtime option in
+  // this case at all, as we'll have to throw away produced code for a mismatch.
+  if (!has_explicit_checks_options) {
+    bool cross_compiling = true;
+    switch (kRuntimeISA) {
+      case kArm:
+      case kThumb2:
+        cross_compiling = instruction_set != kArm && instruction_set != kThumb2;
+        break;
+      default:
+        cross_compiling = instruction_set != kRuntimeISA;
+        break;
+    }
+    if (!cross_compiling) {
+      Runtime* runtime = Runtime::Current();
+      compiler_options.SetExplicitNullChecks(runtime->ExplicitNullChecks());
+      compiler_options.SetExplicitStackOverflowChecks(runtime->ExplicitStackOverflowChecks());
+      compiler_options.SetExplicitSuspendChecks(runtime->ExplicitSuspendChecks());
+    }
+  }
+
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now so that we don't starve GC.
   Thread* self = Thread::Current();
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index e6a6860..b012bc1 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -56,10 +56,16 @@
   "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 };
 
+// 64-bit opcode REX modifier.
+constexpr uint8_t REX_W = 0b1000;
+constexpr uint8_t REX_R = 0b0100;
+constexpr uint8_t REX_X = 0b0010;
+constexpr uint8_t REX_B = 0b0001;
+
 static void DumpReg0(std::ostream& os, uint8_t rex, size_t reg,
                      bool byte_operand, uint8_t size_override) {
   DCHECK_LT(reg, (rex == 0) ? 8u : 16u);
-  bool rex_w = (rex & 0b1000) != 0;
+  bool rex_w = (rex & REX_W) != 0;
   if (byte_operand) {
     os << ((rex == 0) ? gReg8Names[reg] : gExtReg8Names[reg]);
   } else if (rex_w) {
@@ -86,14 +92,14 @@
 
 static void DumpReg(std::ostream& os, uint8_t rex, uint8_t reg,
                     bool byte_operand, uint8_t size_override, RegFile reg_file) {
-  bool rex_r = (rex & 0b0100) != 0;
+  bool rex_r = (rex & REX_R) != 0;
   size_t reg_num = rex_r ? (reg + 8) : reg;
   DumpAnyReg(os, rex, reg_num, byte_operand, size_override, reg_file);
 }
 
 static void DumpRmReg(std::ostream& os, uint8_t rex, uint8_t reg,
                       bool byte_operand, uint8_t size_override, RegFile reg_file) {
-  bool rex_b = (rex & 0b0001) != 0;
+  bool rex_b = (rex & REX_B) != 0;
   size_t reg_num = rex_b ? (reg + 8) : reg;
   DumpAnyReg(os, rex, reg_num, byte_operand, size_override, reg_file);
 }
@@ -107,19 +113,19 @@
 }
 
 static void DumpBaseReg(std::ostream& os, uint8_t rex, uint8_t reg) {
-  bool rex_b = (rex & 0b0001) != 0;
+  bool rex_b = (rex & REX_B) != 0;
   size_t reg_num = rex_b ? (reg + 8) : reg;
   DumpAddrReg(os, rex, reg_num);
 }
 
 static void DumpIndexReg(std::ostream& os, uint8_t rex, uint8_t reg) {
-  bool rex_x = (rex & 0b0010) != 0;
+  bool rex_x = (rex & REX_X) != 0;
   uint8_t reg_num = rex_x ? (reg + 8) : reg;
   DumpAddrReg(os, rex, reg_num);
 }
 
 static void DumpOpcodeReg(std::ostream& os, uint8_t rex, uint8_t reg) {
-  bool rex_b = (rex & 0b0001) != 0;
+  bool rex_b = (rex & REX_B) != 0;
   size_t reg_num = rex_b ? (reg + 8) : reg;
   DumpReg0(os, rex, reg_num, false, 0);
 }
@@ -896,6 +902,7 @@
   case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7:
     opcode << "mov";
     immediate_bytes = 1;
+    byte_operand = true;
     reg_in_opcode = true;
     break;
   case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF:
@@ -916,6 +923,15 @@
     byte_operand = (*instr == 0xC0);
     break;
   case 0xC3: opcode << "ret"; break;
+  case 0xC6:
+    static const char* c6_opcodes[] = {"mov", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6"};
+    modrm_opcodes = c6_opcodes;
+    store = true;
+    immediate_bytes = 1;
+    has_modrm = true;
+    reg_is_opcode = true;
+    byte_operand = true;
+    break;
   case 0xC7:
     static const char* c7_opcodes[] = {"mov", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7"};
     modrm_opcodes = c7_opcodes;
@@ -1064,6 +1080,16 @@
     if (reg_is_opcode && modrm_opcodes != NULL) {
       opcode << modrm_opcodes[reg_or_opcode];
     }
+
+    // Add opcode suffixes to indicate size.
+    if (byte_operand) {
+      opcode << 'b';
+    } else if ((rex & REX_W) != 0) {
+      opcode << 'q';
+    } else if (prefix[2] == 0x66) {
+      opcode << 'w';
+    }
+
     if (load) {
       if (!reg_is_opcode) {
         DumpReg(args, rex, reg_or_opcode, byte_operand, prefix[2], dst_reg_file);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index c40ae7a..992202a 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -54,6 +54,7 @@
 	gc/collector/concurrent_copying.cc \
 	gc/collector/garbage_collector.cc \
 	gc/collector/immune_region.cc \
+	gc/collector/mark_compact.cc \
 	gc/collector/mark_sweep.cc \
 	gc/collector/partial_mark_sweep.cc \
 	gc/collector/semi_space.cc \
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index f81e2f9..2a82129 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -22,6 +22,7 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "instruction_set.h"
 #include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "thread.h"
@@ -59,7 +60,7 @@
   // get the method from the top of the stack.  However it's in r0.
   uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(*out_sp) - Thread::kStackOverflowReservedBytes);
+      reinterpret_cast<uint8_t*>(*out_sp) - kArmStackOverflowReservedBytes);
   if (overflow_addr == fault_addr) {
     *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
   } else {
@@ -190,7 +191,7 @@
   VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
 
-  uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+  uintptr_t overflow_addr = sp - kArmStackOverflowReservedBytes;
 
   Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
   CHECK_EQ(self, Thread::Current());
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 4ede453..2e60b93 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1615,14 +1615,14 @@
     // we would need to fully restore it. As there are a lot of callee-save registers, it seems
     // easier to have an extra small stack area.
 
-    str x19, [sp, #-16]!      // Save integer result.
+    str x0, [sp, #-16]!       // Save integer result.
     .cfi_adjust_cfa_offset 16
     str d0,  [sp, #8]         // Save floating-point result.
 
-    mov   x0, xSELF           // Pass Thread.
     add   x1, sp, #16         // Pass SP.
     mov   x2, x0              // Pass integer result.
     fmov  x3, d0              // Pass floating-point result.
+    mov   x0, xSELF           // Pass Thread.
     bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
 
     mov   x9, x0              // Return address from instrumentation call.
diff --git a/runtime/atomic.cc b/runtime/atomic.cc
index 63f2cf8..e766a8d 100644
--- a/runtime/atomic.cc
+++ b/runtime/atomic.cc
@@ -31,7 +31,7 @@
   if (kNeedSwapMutexes) {
     gSwapMutexes = new std::vector<Mutex*>;
     for (size_t i = 0; i < kSwapMutexCount; ++i) {
-      gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe"));
+      gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe", kSwapMutexesLock));
     }
   }
 }
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 1ba6180..81e62ab 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -55,6 +55,7 @@
 enum LockLevel {
   kLoggingLock = 0,
   kMemMapsLock,
+  kSwapMutexesLock,
   kUnexpectedSignalLock,
   kThreadSuspendCountLock,
   kAbortLock,
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 6d5b59c..f29a7ec 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -122,7 +122,9 @@
   char* ptr = static_cast<char*>(buffer);
   while (byte_count > 0) {
     ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd_, ptr, byte_count));
-    if (bytes_read == -1) {
+    if (bytes_read <= 0) {
+      // 0: end of file
+      // -1: error
       return false;
     }
     byte_count -= bytes_read;  // Reduce the number of remaining bytes.
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index d620666..33b3d3e 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -16,6 +16,7 @@
 
 #include "base/unix_file/fd_file.h"
 #include "base/unix_file/random_access_file_test.h"
+#include "common_runtime_test.h"  // For ScratchFile
 #include "gtest/gtest.h"
 
 namespace unix_file {
@@ -60,4 +61,15 @@
   EXPECT_TRUE(file.IsOpened());
 }
 
+TEST_F(FdFileTest, ReadFullyEmptyFile) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  FdFile file;
+  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDONLY));
+  EXPECT_GE(file.Fd(), 0);
+  EXPECT_TRUE(file.IsOpened());
+  uint8_t buffer[16];
+  EXPECT_FALSE(file.ReadFully(&buffer, 4));
+}
+
 }  // namespace unix_file
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index f745088..16e0ec3 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_CLASS_LINKER_INL_H_
 
 #include "class_linker.h"
+#include "gc/heap-inl.h"
 #include "mirror/art_field.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
@@ -186,13 +187,19 @@
 
 inline mirror::IfTable* ClassLinker::AllocIfTable(Thread* self, size_t ifcount) {
   return down_cast<mirror::IfTable*>(
-      mirror::IfTable::Alloc(self, GetClassRoot(kObjectArrayClass), ifcount * mirror::IfTable::kMax));
+      mirror::IfTable::Alloc(self, GetClassRoot(kObjectArrayClass),
+                             ifcount * mirror::IfTable::kMax));
 }
 
 inline mirror::ObjectArray<mirror::ArtField>* ClassLinker::AllocArtFieldArray(Thread* self,
                                                                               size_t length) {
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  // Can't have movable field arrays for mark compact since we need these arrays to always be valid
+  // so that we can do Object::VisitReferences in the case where the fields don't fit in the
+  // reference offsets word.
   return mirror::ObjectArray<mirror::ArtField>::Alloc(
-      self, GetClassRoot(kJavaLangReflectArtFieldArrayClass), length);
+      self, GetClassRoot(kJavaLangReflectArtFieldArrayClass), length,
+      kMoveFieldArrays ? heap->GetCurrentAllocator() : heap->GetCurrentNonMovingAllocator());
 }
 
 inline mirror::Class* ClassLinker::GetClassRoot(ClassRoot class_root)
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d684a50..d68aca9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1061,6 +1061,42 @@
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
 }
 
+void ClassLinker::VisitClassRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  if ((flags & kVisitRootFlagAllRoots) != 0) {
+    for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
+      callback(reinterpret_cast<mirror::Object**>(&it.second), arg, 0, kRootStickyClass);
+    }
+  } else if ((flags & kVisitRootFlagNewRoots) != 0) {
+    for (auto& pair : new_class_roots_) {
+      mirror::Object* old_ref = pair.second;
+      callback(reinterpret_cast<mirror::Object**>(&pair.second), arg, 0, kRootStickyClass);
+      if (UNLIKELY(pair.second != old_ref)) {
+        // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
+        // corresponding object. This is slow, but luckily for us, this may only happen with a
+        // concurrent moving GC.
+        for (auto it = class_table_.lower_bound(pair.first), end = class_table_.end();
+            it != end && it->first == pair.first; ++it) {
+          // If the class stored matches the old class, update it to the new value.
+          if (old_ref == it->second) {
+            it->second = pair.second;
+          }
+        }
+      }
+    }
+  }
+  if ((flags & kVisitRootFlagClearRootLog) != 0) {
+    new_class_roots_.clear();
+  }
+  if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
+    log_new_class_table_roots_ = true;
+  } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
+    log_new_class_table_roots_ = false;
+  }
+  // We deliberately ignore the class roots in the image since we
+  // handle image roots by using the MS/CMS rescanning of dirty cards.
+}
+
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
@@ -1087,41 +1123,7 @@
       log_new_dex_caches_roots_ = false;
     }
   }
-  {
-    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    if ((flags & kVisitRootFlagAllRoots) != 0) {
-      for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
-        callback(reinterpret_cast<mirror::Object**>(&it.second), arg, 0, kRootStickyClass);
-      }
-    } else if ((flags & kVisitRootFlagNewRoots) != 0) {
-      for (auto& pair : new_class_roots_) {
-        mirror::Object* old_ref = pair.second;
-        callback(reinterpret_cast<mirror::Object**>(&pair.second), arg, 0, kRootStickyClass);
-        if (UNLIKELY(pair.second != old_ref)) {
-          // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
-          // corresponding object. This is slow, but luckily for us, this may only happen with a
-          // concurrent moving GC.
-          for (auto it = class_table_.lower_bound(pair.first), end = class_table_.end();
-              it != end && it->first == pair.first; ++it) {
-            // If the class stored matches the old class, update it to the new value.
-            if (old_ref == it->second) {
-              it->second = pair.second;
-            }
-          }
-        }
-      }
-    }
-    if ((flags & kVisitRootFlagClearRootLog) != 0) {
-      new_class_roots_.clear();
-    }
-    if ((flags & kVisitRootFlagStartLoggingNewRoots) != 0) {
-      log_new_class_table_roots_ = true;
-    } else if ((flags & kVisitRootFlagStopLoggingNewRoots) != 0) {
-      log_new_class_table_roots_ = false;
-    }
-    // We deliberately ignore the class roots in the image since we
-    // handle image roots by using the MS/CMS rescanning of dirty cards.
-  }
+  VisitClassRoots(callback, arg, flags);
   callback(reinterpret_cast<mirror::Object**>(&array_iftable_), arg, 0, kRootVMInternal);
   DCHECK(array_iftable_ != nullptr);
   for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
@@ -1252,7 +1254,7 @@
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
   InitializeClassVisitor visitor(class_size);
-  mirror::Object* k = (kMovingClasses) ?
+  mirror::Object* k = kMovingClasses ?
       heap->AllocObject<true>(self, java_lang_Class, class_size, visitor) :
       heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
   if (UNLIKELY(k == nullptr)) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 6d96aa2..62b5ea8 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -247,8 +247,10 @@
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void VisitClassRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+      LOCKS_EXCLUDED(Locks::classlinker_classes_lock_);
   void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
-      LOCKS_EXCLUDED(Locks::classlinker_classes_lock_, dex_lock_);
+      LOCKS_EXCLUDED(dex_lock_);
 
   mirror::DexCache* FindDexCache(const DexFile& dex_file) const
       LOCKS_EXCLUDED(dex_lock_)
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 3ff55ab..10f34d9 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -935,8 +935,7 @@
   field_.field_idx_delta_ = DecodeUnsignedLeb128(&ptr_pos_);
   field_.access_flags_ = DecodeUnsignedLeb128(&ptr_pos_);
   if (last_idx_ != 0 && field_.field_idx_delta_ == 0) {
-    LOG(WARNING) << "Duplicate field " << PrettyField(GetMemberIndex(), dex_file_)
-                 << " in " << dex_file_.GetLocation();
+    LOG(WARNING) << "Duplicate field in " << dex_file_.GetLocation();
   }
 }
 
@@ -945,8 +944,7 @@
   method_.access_flags_ = DecodeUnsignedLeb128(&ptr_pos_);
   method_.code_off_ = DecodeUnsignedLeb128(&ptr_pos_);
   if (last_idx_ != 0 && method_.method_idx_delta_ == 0) {
-    LOG(WARNING) << "Duplicate method " << PrettyMethod(GetMemberIndex(), dex_file_)
-                 << " in " << dex_file_.GetLocation();
+    LOG(WARNING) << "Duplicate method in " << dex_file_.GetLocation();
   }
 }
 
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index edba502..b6810b0 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -145,28 +145,30 @@
   };
 
   enum VerifyFlag {
-    kVerifyNone            = 0x000000,
-    kVerifyRegA            = 0x000001,
-    kVerifyRegAWide        = 0x000002,
-    kVerifyRegB            = 0x000004,
-    kVerifyRegBField       = 0x000008,
-    kVerifyRegBMethod      = 0x000010,
-    kVerifyRegBNewInstance = 0x000020,
-    kVerifyRegBString      = 0x000040,
-    kVerifyRegBType        = 0x000080,
-    kVerifyRegBWide        = 0x000100,
-    kVerifyRegC            = 0x000200,
-    kVerifyRegCField       = 0x000400,
-    kVerifyRegCNewArray    = 0x000800,
-    kVerifyRegCType        = 0x001000,
-    kVerifyRegCWide        = 0x002000,
-    kVerifyArrayData       = 0x004000,
-    kVerifyBranchTarget    = 0x008000,
-    kVerifySwitchTargets   = 0x010000,
-    kVerifyVarArg          = 0x020000,
-    kVerifyVarArgRange     = 0x040000,
-    kVerifyRuntimeOnly     = 0x080000,
-    kVerifyError           = 0x100000,
+    kVerifyNone               = 0x000000,
+    kVerifyRegA               = 0x000001,
+    kVerifyRegAWide           = 0x000002,
+    kVerifyRegB               = 0x000004,
+    kVerifyRegBField          = 0x000008,
+    kVerifyRegBMethod         = 0x000010,
+    kVerifyRegBNewInstance    = 0x000020,
+    kVerifyRegBString         = 0x000040,
+    kVerifyRegBType           = 0x000080,
+    kVerifyRegBWide           = 0x000100,
+    kVerifyRegC               = 0x000200,
+    kVerifyRegCField          = 0x000400,
+    kVerifyRegCNewArray       = 0x000800,
+    kVerifyRegCType           = 0x001000,
+    kVerifyRegCWide           = 0x002000,
+    kVerifyArrayData          = 0x004000,
+    kVerifyBranchTarget       = 0x008000,
+    kVerifySwitchTargets      = 0x010000,
+    kVerifyVarArg             = 0x020000,
+    kVerifyVarArgNonZero      = 0x040000,
+    kVerifyVarArgRange        = 0x080000,
+    kVerifyVarArgRangeNonZero = 0x100000,
+    kVerifyRuntimeOnly        = 0x200000,
+    kVerifyError              = 0x400000,
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
@@ -506,7 +508,8 @@
 
   int GetVerifyExtraFlags() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyArrayData | kVerifyBranchTarget |
-        kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgRange | kVerifyError));
+        kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgNonZero | kVerifyVarArgRange |
+        kVerifyVarArgRangeNonZero | kVerifyError));
   }
 
   bool GetVerifyIsRuntimeOnly() const {
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index 4cda58b..103b0d7 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -128,17 +128,17 @@
   V(0x6B, SPUT_BYTE, "sput-byte", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
   V(0x6C, SPUT_CHAR, "sput-char", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
   V(0x6D, SPUT_SHORT, "sput-short", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
-  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
-  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
+  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
   V(0x71, INVOKE_STATIC, "invoke-static", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
-  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
+  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
   V(0x73, RETURN_VOID_BARRIER, "return-void-barrier", k10x, false, kNone, kReturn, kVerifyNone) \
-  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
-  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
-  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
+  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
   V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
-  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
+  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
   V(0x79, UNUSED_79, "unused-79", k10x, false, kUnknown, 0, kVerifyError) \
   V(0x7A, UNUSED_7A, "unused-7a", k10x, false, kUnknown, 0, kVerifyError) \
   V(0x7B, NEG_INT, "neg-int", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
@@ -251,8 +251,8 @@
   V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArg | kVerifyRuntimeOnly) \
-  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRange | kVerifyRuntimeOnly) \
+  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
+  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
   V(0xEB, UNUSED_EB, "unused-eb", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xEC, UNUSED_EC, "unused-ec", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xED, UNUSED_ED, "unused-ed", k10x, false, kUnknown, 0, kVerifyError) \
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 3d8b29f..ff836a4 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -511,13 +511,8 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* resolved_field =
       referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
-  if (UNLIKELY(resolved_field == NULL)) {
-    return NULL;
-  }
-  mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-  // Check class is initiliazed or initializing.
-  if (UNLIKELY(!fields_class->IsInitializing())) {
-    return NULL;
+  if (UNLIKELY(resolved_field == nullptr)) {
+    return nullptr;
   }
   // Check for incompatible class change.
   bool is_primitive;
@@ -541,7 +536,15 @@
   }
   if (UNLIKELY(resolved_field->IsStatic() != is_static)) {
     // Incompatible class change.
-    return NULL;
+    return nullptr;
+  }
+  mirror::Class* fields_class = resolved_field->GetDeclaringClass();
+  if (is_static) {
+    // Check class is initialized else fail so that we can contend to initialize the class with
+    // other threads that may be racing to do this.
+    if (UNLIKELY(!fields_class->IsInitialized())) {
+      return nullptr;
+    }
   }
   mirror::Class* referring_class = referrer->GetDeclaringClass();
   if (UNLIKELY(!referring_class->CanAccess(fields_class) ||
@@ -549,11 +552,11 @@
                                                  resolved_field->GetAccessFlags()) ||
                (is_set && resolved_field->IsFinal() && (fields_class != referring_class)))) {
     // Illegal access.
-    return NULL;
+    return nullptr;
   }
   if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
                resolved_field->FieldSize() != expected_size)) {
-    return NULL;
+    return nullptr;
   }
   return resolved_field;
 }
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index f2e2bf7..329c175 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -33,7 +33,7 @@
   // Ensure static methods are initialized.
   if (method->IsStatic()) {
     mirror::Class* declaringClass = method->GetDeclaringClass();
-    if (UNLIKELY(!declaringClass->IsInitializing())) {
+    if (UNLIKELY(!declaringClass->IsInitialized())) {
       self->PushShadowFrame(shadow_frame);
       StackHandleScope<1> hs(self);
       Handle<mirror::Class> h_class(hs.NewHandle(declaringClass));
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index 6825e78..2da016f 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -213,7 +213,7 @@
     self->PushShadowFrame(shadow_frame);
     self->EndAssertNoThreadSuspension(old_cause);
 
-    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitializing()) {
+    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitialized()) {
       // Ensure static method's class is initialized.
       Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
       if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 514d1aa..7a144b6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -484,7 +484,7 @@
     self->PushShadowFrame(shadow_frame);
     self->EndAssertNoThreadSuspension(old_cause);
 
-    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitializing()) {
+    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitialized()) {
       // Ensure static method's class is initialized.
       StackHandleScope<1> hs(self);
       Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 224b33e..c0aa43e 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -52,7 +52,7 @@
   const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
   std::string error_msg;
   std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), nullptr, bitmap_size,
-                                                 PROT_READ | PROT_WRITE, false, &error_msg));
+                                                       PROT_READ | PROT_WRITE, false, &error_msg));
   if (UNLIKELY(mem_map.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate bitmap " << name << ": " << error_msg;
     return nullptr;
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 0849171..27fb087 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -42,7 +42,6 @@
 class SpaceBitmap {
  public:
   typedef void ScanCallback(mirror::Object* obj, void* finger, void* arg);
-
   typedef void SweepCallback(size_t ptr_count, mirror::Object** ptrs, void* arg);
 
   // Initialize a space bitmap so that it points to a bitmap large enough to cover a heap at
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 55262f2..656c55b 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -529,7 +529,7 @@
 }
 
 size_t RosAlloc::Free(Thread* self, void* ptr) {
-  ReaderMutexLock rmu(self, bulk_free_lock_);
+  WriterMutexLock rmu(self, bulk_free_lock_);
   return FreeInternal(self, ptr);
 }
 
@@ -1642,7 +1642,7 @@
 void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
   // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-  WriterMutexLock wmu(self, bulk_free_lock_);
+  ReaderMutexLock wmu(self, bulk_free_lock_);
   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
     Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
@@ -1720,7 +1720,7 @@
   if (kIsDebugBuild) {
     Thread* self = Thread::Current();
     // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-    WriterMutexLock wmu(self, bulk_free_lock_);
+    ReaderMutexLock wmu(self, bulk_free_lock_);
     for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
       MutexLock mu(self, *size_bracket_locks_[idx]);
       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
@@ -1867,7 +1867,7 @@
   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
       << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
   MutexLock mu(self, *Locks::thread_list_lock_);
-  WriterMutexLock wmu(self, bulk_free_lock_);
+  ReaderMutexLock wmu(self, bulk_free_lock_);
   std::vector<Run*> runs;
   {
     MutexLock mu(self, lock_);
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index a439188..13f61ec 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -45,10 +45,7 @@
     byte magic_num_;  // The magic number used for debugging only.
 
     bool IsFree() const {
-      if (kIsDebugBuild) {
-        return magic_num_ == kMagicNumFree;
-      }
-      return true;
+      return !kIsDebugBuild || magic_num_ == kMagicNumFree;
     }
     size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
       const byte* fpr_base = reinterpret_cast<const byte*>(this);
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index a17c36b..8622fd6 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -31,20 +31,36 @@
 namespace gc {
 namespace collector {
 
+Iteration::Iteration()
+    : duration_ns_(0), timings_("GC iteration timing logger", true, VLOG_IS_ON(heap)) {
+  Reset(kGcCauseBackground, false);  // Reset to some place holder values.
+}
+
+void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) {
+  timings_.Reset();
+  pause_times_.clear();
+  duration_ns_ = 0;
+  clear_soft_references_ = clear_soft_references;
+  gc_cause_ = gc_cause;
+  freed_ = ObjectBytePair();
+  freed_los_ = ObjectBytePair();
+}
+
+uint64_t Iteration::GetEstimatedThroughput() const {
+  // Add 1ms to prevent possible division by 0.
+  return (static_cast<uint64_t>(freed_.bytes) * 1000) / (NsToMs(GetDurationNs()) + 1);
+}
+
 GarbageCollector::GarbageCollector(Heap* heap, const std::string& name)
     : heap_(heap),
       name_(name),
-      gc_cause_(kGcCauseForAlloc),
-      clear_soft_references_(false),
-      duration_ns_(0),
-      timings_(name_.c_str(), true, VLOG_IS_ON(heap)),
       pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
       cumulative_timings_(name) {
   ResetCumulativeStatistics();
 }
 
 void GarbageCollector::RegisterPause(uint64_t nano_length) {
-  pause_times_.push_back(nano_length);
+  GetCurrentIteration()->pause_times_.push_back(nano_length);
 }
 
 void GarbageCollector::ResetCumulativeStatistics() {
@@ -59,32 +75,26 @@
   ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName()).c_str());
   Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
-  timings_.Reset();
-  pause_times_.clear();
-  duration_ns_ = 0;
-  clear_soft_references_ = clear_soft_references;
-  gc_cause_ = gc_cause;
-  // Reset stats.
-  freed_bytes_ = 0;
-  freed_large_object_bytes_ = 0;
-  freed_objects_ = 0;
-  freed_large_objects_ = 0;
+  Iteration* current_iteration = GetCurrentIteration();
+  current_iteration->Reset(gc_cause, clear_soft_references);
   RunPhases();  // Run all the GC phases.
   // Add the current timings to the cumulative timings.
-  cumulative_timings_.AddLogger(timings_);
+  cumulative_timings_.AddLogger(*GetTimings());
   // Update cumulative statistics with how many bytes the GC iteration freed.
-  total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
-  total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
+  total_freed_objects_ += current_iteration->GetFreedObjects() +
+      current_iteration->GetFreedLargeObjects();
+  total_freed_bytes_ += current_iteration->GetFreedBytes() +
+      current_iteration->GetFreedLargeObjectBytes();
   uint64_t end_time = NanoTime();
-  duration_ns_ = end_time - start_time;
+  current_iteration->SetDurationNs(end_time - start_time);
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // The entire GC was paused, clear the fake pauses which might be in the pause times and add
     // the whole GC duration.
-    pause_times_.clear();
-    RegisterPause(duration_ns_);
+    current_iteration->pause_times_.clear();
+    RegisterPause(current_iteration->GetDurationNs());
   }
-  total_time_ns_ += GetDurationNs();
-  for (uint64_t pause_time : pause_times_) {
+  total_time_ns_ += current_iteration->GetDurationNs();
+  for (uint64_t pause_time : current_iteration->GetPauseTimes()) {
     pause_histogram_.AddValue(pause_time / 1000);
   }
   ATRACE_END();
@@ -125,23 +135,6 @@
   return (total_freed_bytes_ * 1000) / (NsToMs(GetCumulativeTimings().GetTotalNs()) + 1);
 }
 
-uint64_t GarbageCollector::GetEstimatedLastIterationThroughput() const {
-  // Add 1ms to prevent possible division by 0.
-  return (static_cast<uint64_t>(freed_bytes_) * 1000) / (NsToMs(GetDurationNs()) + 1);
-}
-
-void GarbageCollector::RecordFree(uint64_t freed_objects, int64_t freed_bytes) {
-  freed_objects_ += freed_objects;
-  freed_bytes_ += freed_bytes;
-  GetHeap()->RecordFree(freed_objects, freed_bytes);
-}
-
-void GarbageCollector::RecordFreeLargeObjects(uint64_t freed_objects, int64_t freed_bytes) {
-  freed_large_objects_ += freed_objects;
-  freed_large_object_bytes_ += freed_bytes;
-  GetHeap()->RecordFree(freed_objects, freed_bytes);
-}
-
 void GarbageCollector::ResetMeasurements() {
   cumulative_timings_.Reset();
   pause_histogram_.Reset();
@@ -160,6 +153,23 @@
   Runtime::Current()->GetThreadList()->ResumeAll();
 }
 
+// Returns the current GC iteration and assocated info.
+Iteration* GarbageCollector::GetCurrentIteration() {
+  return heap_->GetCurrentGcIteration();
+}
+const Iteration* GarbageCollector::GetCurrentIteration() const {
+  return heap_->GetCurrentGcIteration();
+}
+
+void GarbageCollector::RecordFree(const ObjectBytePair& freed) {
+  GetCurrentIteration()->freed_.Add(freed);
+  heap_->RecordFree(freed.objects, freed.bytes);
+}
+void GarbageCollector::RecordFreeLOS(const ObjectBytePair& freed) {
+  GetCurrentIteration()->freed_los_.Add(freed);
+  heap_->RecordFree(freed.objects, freed.bytes);
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index f4f9dbb..885569e 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -33,6 +33,78 @@
 
 namespace collector {
 
+struct ObjectBytePair {
+  ObjectBytePair(uint64_t num_objects = 0, int64_t num_bytes = 0)
+      : objects(num_objects), bytes(num_bytes) {}
+  void Add(const ObjectBytePair& other) {
+    objects += other.objects;
+    bytes += other.bytes;
+  }
+  // Number of objects which were freed.
+  uint64_t objects;
+  // Freed bytes are signed since the GC can free negative bytes if it promotes objects to a space
+  // which has a larger allocation size.
+  int64_t bytes;
+};
+
+// A information related single garbage collector iteration. Since we only ever have one GC running
+// at any given time, we can have a single iteration info.
+class Iteration {
+ public:
+  Iteration();
+  // Returns how long the mutators were paused in nanoseconds.
+  const std::vector<uint64_t>& GetPauseTimes() const {
+    return pause_times_;
+  }
+  TimingLogger* GetTimings() {
+    return &timings_;
+  }
+  // Returns how long the GC took to complete in nanoseconds.
+  uint64_t GetDurationNs() const {
+    return duration_ns_;
+  }
+  int64_t GetFreedBytes() const {
+    return freed_.bytes;
+  }
+  int64_t GetFreedLargeObjectBytes() const {
+    return freed_los_.bytes;
+  }
+  uint64_t GetFreedObjects() const {
+    return freed_.objects;
+  }
+  uint64_t GetFreedLargeObjects() const {
+    return freed_los_.objects;
+  }
+  void Reset(GcCause gc_cause, bool clear_soft_references);
+  // Returns the estimated throughput of the iteration.
+  uint64_t GetEstimatedThroughput() const;
+  bool GetClearSoftReferences() const {
+    return clear_soft_references_;
+  }
+  void SetClearSoftReferences(bool clear_soft_references) {
+    clear_soft_references_ = clear_soft_references;
+  }
+  GcCause GetGcCause() const {
+    return gc_cause_;
+  }
+
+ private:
+  void SetDurationNs(uint64_t duration) {
+    duration_ns_ = duration;
+  }
+
+  GcCause gc_cause_;
+  bool clear_soft_references_;
+  uint64_t duration_ns_;
+  TimingLogger timings_;
+  ObjectBytePair freed_;
+  ObjectBytePair freed_los_;
+  std::vector<uint64_t> pause_times_;
+
+  friend class GarbageCollector;
+  DISALLOW_COPY_AND_ASSIGN(Iteration);
+};
+
 class GarbageCollector {
  public:
   class SCOPED_LOCKABLE ScopedPause {
@@ -62,22 +134,7 @@
   Heap* GetHeap() const {
     return heap_;
   }
-
-  // Returns how long the mutators were paused in nanoseconds.
-  const std::vector<uint64_t>& GetPauseTimes() const {
-    return pause_times_;
-  }
-
-  // Returns how long the GC took to complete in nanoseconds.
-  uint64_t GetDurationNs() const {
-    return duration_ns_;
-  }
-
   void RegisterPause(uint64_t nano_length);
-
-  TimingLogger& GetTimings() {
-    return timings_;
-  }
   const CumulativeLogger& GetCumulativeTimings() const {
     return cumulative_timings_;
   }
@@ -87,52 +144,36 @@
   // Swap the live and mark bitmaps of spaces that are active for the collector. For partial GC,
   // this is the allocation space, for full GC then we swap the zygote bitmaps too.
   void SwapBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  int64_t GetFreedBytes() const {
-    return freed_bytes_;
-  }
-
-  int64_t GetFreedLargeObjectBytes() const {
-    return freed_large_object_bytes_;
-  }
-
-  uint64_t GetFreedObjects() const {
-    return freed_objects_;
-  }
-
-  uint64_t GetFreedLargeObjects() const {
-    return freed_large_objects_;
-  }
-
   uint64_t GetTotalPausedTimeNs() const {
     return pause_histogram_.AdjustedSum();
   }
-
   int64_t GetTotalFreedBytes() const {
     return total_freed_bytes_;
   }
-
   uint64_t GetTotalFreedObjects() const {
     return total_freed_objects_;
   }
-
   const Histogram<uint64_t>& GetPauseHistogram() const {
     return pause_histogram_;
   }
-
   // Reset the cumulative timings and pause histogram.
   void ResetMeasurements();
-
   // Returns the estimated throughput in bytes / second.
   uint64_t GetEstimatedMeanThroughput() const;
-
-  // Returns the estimated throughput of the last GC iteration.
-  uint64_t GetEstimatedLastIterationThroughput() const;
-
   // Returns how many GC iterations have been run.
-  size_t GetIterations() const {
+  size_t NumberOfIterations() const {
     return GetCumulativeTimings().GetIterations();
   }
+  // Returns the current GC iteration and assocated info.
+  Iteration* GetCurrentIteration();
+  const Iteration* GetCurrentIteration() const;
+  TimingLogger* GetTimings() {
+    return &GetCurrentIteration()->timings_;
+  }
+  // Record a free of normal objects.
+  void RecordFree(const ObjectBytePair& freed);
+  // Record a free of large objects.
+  void RecordFreeLOS(const ObjectBytePair& freed);
 
  protected:
   // Run all of the GC phases.
@@ -141,40 +182,17 @@
   // Revoke all the thread-local buffers.
   virtual void RevokeAllThreadLocalBuffers() = 0;
 
-  // Record that you have freed some objects or large objects, calls Heap::RecordFree.
-  // TODO: These are not thread safe, add a lock if we get parallel sweeping.
-  void RecordFree(uint64_t freed_objects, int64_t freed_bytes);
-  void RecordFreeLargeObjects(uint64_t freed_objects, int64_t freed_bytes);
-
   static constexpr size_t kPauseBucketSize = 500;
   static constexpr size_t kPauseBucketCount = 32;
 
   Heap* const heap_;
-
   std::string name_;
-
-  GcCause gc_cause_;
-  bool clear_soft_references_;
-
-  uint64_t duration_ns_;
-  TimingLogger timings_;
-
   // Cumulative statistics.
   Histogram<uint64_t> pause_histogram_;
   uint64_t total_time_ns_;
   uint64_t total_freed_objects_;
   int64_t total_freed_bytes_;
-
-  // Single GC statitstics, freed bytes are signed since the GC can free negative bytes if it
-  // promotes objects to a space which has a larger allocation size.
-  int64_t freed_bytes_;
-  int64_t freed_large_object_bytes_;
-  uint64_t freed_objects_;
-  uint64_t freed_large_objects_;
-
   CumulativeLogger cumulative_timings_;
-
-  std::vector<uint64_t> pause_times_;
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
new file mode 100644
index 0000000..ebd1738
--- /dev/null
+++ b/runtime/gc/collector/mark_compact.cc
@@ -0,0 +1,628 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mark_compact.h"
+
+#include "base/logging.h"
+#include "base/mutex-inl.h"
+#include "base/timing_logger.h"
+#include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/mod_union_table.h"
+#include "gc/accounting/remembered_set.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "gc/reference_processor.h"
+#include "gc/space/bump_pointer_space.h"
+#include "gc/space/bump_pointer_space-inl.h"
+#include "gc/space/image_space.h"
+#include "gc/space/large_object_space.h"
+#include "gc/space/space-inl.h"
+#include "indirect_reference_table.h"
+#include "intern_table.h"
+#include "jni_internal.h"
+#include "mark_sweep-inl.h"
+#include "monitor.h"
+#include "mirror/art_field.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
+#include "mirror/reference-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array.h"
+#include "mirror/object_array-inl.h"
+#include "runtime.h"
+#include "stack.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+using ::art::mirror::Class;
+using ::art::mirror::Object;
+
+namespace art {
+namespace gc {
+namespace collector {
+
+void MarkCompact::BindBitmaps() {
+  GetTimings()->StartSplit("BindBitmaps");
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  // Mark all of the spaces we never collect as immune.
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+    }
+  }
+  GetTimings()->EndSplit();
+}
+
+MarkCompact::MarkCompact(Heap* heap, const std::string& name_prefix)
+    : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "mark compact"),
+      space_(nullptr), collector_name_(name_) {
+}
+
+void MarkCompact::RunPhases() {
+  Thread* self = Thread::Current();
+  InitializePhase();
+  CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
+  {
+    ScopedPause pause(this);
+    GetHeap()->PreGcVerificationPaused(this);
+    GetHeap()->PrePauseRosAllocVerification(this);
+    MarkingPhase();
+    ReclaimPhase();
+  }
+  GetHeap()->PostGcVerification(this);
+  FinishPhase();
+}
+
+void MarkCompact::ForwardObject(mirror::Object* obj) {
+  const size_t alloc_size = RoundUp(obj->SizeOf(), space::BumpPointerSpace::kAlignment);
+  LockWord lock_word = obj->GetLockWord(false);
+  // If we have a non empty lock word, store it and restore it later.
+  if (lock_word.GetValue() != LockWord().GetValue()) {
+    // Set the bit in the bitmap so that we know to restore it later.
+    objects_with_lockword_->Set(obj);
+    lock_words_to_restore_.push_back(lock_word);
+  }
+  obj->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(bump_pointer_)),
+                   false);
+  bump_pointer_ += alloc_size;
+  ++live_objects_in_space_;
+}
+
+class CalculateObjectForwardingAddressVisitor {
+ public:
+  explicit CalculateObjectForwardingAddressVisitor(MarkCompact* collector)
+      : collector_(collector) {}
+  void operator()(mirror::Object* obj) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_,
+                                                                      Locks::heap_bitmap_lock_) {
+    DCHECK_ALIGNED(obj, space::BumpPointerSpace::kAlignment);
+    DCHECK(collector_->IsMarked(obj));
+    collector_->ForwardObject(obj);
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+void MarkCompact::CalculateObjectForwardingAddresses() {
+  GetTimings()->NewSplit(__FUNCTION__);
+  // The bump pointer in the space where the next forwarding address will be.
+  bump_pointer_ = reinterpret_cast<byte*>(space_->Begin());
+  // Visit all the marked objects in the bitmap.
+  CalculateObjectForwardingAddressVisitor visitor(this);
+  objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
+                                               reinterpret_cast<uintptr_t>(space_->End()),
+                                               visitor);
+}
+
+void MarkCompact::InitializePhase() {
+  TimingLogger::ScopedSplit split("InitializePhase", GetTimings());
+  mark_stack_ = heap_->GetMarkStack();
+  DCHECK(mark_stack_ != nullptr);
+  immune_region_.Reset();
+  CHECK(space_->CanMoveObjects()) << "Attempting compact non-movable space from " << *space_;
+  // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
+  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  mark_bitmap_ = heap_->GetMarkBitmap();
+  live_objects_in_space_ = 0;
+}
+
+void MarkCompact::ProcessReferences(Thread* self) {
+  TimingLogger::ScopedSplit split("ProcessReferences", GetTimings());
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  heap_->GetReferenceProcessor()->ProcessReferences(
+      false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
+      &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this);
+}
+
+class BitmapSetSlowPathVisitor {
+ public:
+  void operator()(const mirror::Object* obj) const {
+    // Marking a large object, make sure its aligned as a sanity check.
+    if (!IsAligned<kPageSize>(obj)) {
+      Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
+      LOG(FATAL) << obj;
+    }
+  }
+};
+
+inline void MarkCompact::MarkObject(mirror::Object* obj) {
+  if (obj == nullptr) {
+    return;
+  }
+  if (kUseBakerOrBrooksReadBarrier) {
+    // Verify all the objects have the correct forward pointer installed.
+    obj->AssertReadBarrierPointer();
+  }
+  if (immune_region_.ContainsObject(obj)) {
+    return;
+  }
+  if (objects_before_forwarding_->HasAddress(obj)) {
+    if (!objects_before_forwarding_->Set(obj)) {
+      MarkStackPush(obj);  // This object was not previously marked.
+    }
+  } else {
+    DCHECK(!space_->HasAddress(obj));
+    BitmapSetSlowPathVisitor visitor;
+    if (!mark_bitmap_->Set(obj, visitor)) {
+      // This object was not previously marked.
+      MarkStackPush(obj);
+    }
+  }
+}
+
+void MarkCompact::MarkingPhase() {
+  Thread* self = Thread::Current();
+  // Bitmap which describes which objects we have to move.
+  objects_before_forwarding_.reset(accounting::ContinuousSpaceBitmap::Create(
+      "objects before forwarding", space_->Begin(), space_->Size()));
+  // Bitmap which describes which lock words we need to restore.
+  objects_with_lockword_.reset(accounting::ContinuousSpaceBitmap::Create(
+      "objects with lock words", space_->Begin(), space_->Size()));
+  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self));
+  TimingLogger::ScopedSplit split("MarkingPhase", GetTimings());
+  // Assume the cleared space is already empty.
+  BindBitmaps();
+  // Process dirty cards and add dirty cards to mod-union tables.
+  heap_->ProcessCards(GetTimings(), false);
+  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // paused GC. This saves memory but only works for pause the world collectors.
+  GetTimings()->NewSplit("ClearCardTable");
+  heap_->GetCardTable()->ClearCardTable();
+  // Need to do this before the checkpoint since we don't want any threads to add references to
+  // the live stack during the recursive mark.
+  GetTimings()->NewSplit("SwapStacks");
+  if (kUseThreadLocalAllocationStack) {
+    heap_->RevokeAllThreadLocalAllocationStacks(self);
+  }
+  heap_->SwapStacks(self);
+  {
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    MarkRoots();
+    // Mark roots of immune spaces.
+    UpdateAndMarkModUnion();
+    // Recursively mark remaining objects.
+    MarkReachableObjects();
+  }
+  ProcessReferences(self);
+  {
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    SweepSystemWeaks();
+  }
+  // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
+  // before they are properly counted.
+  RevokeAllThreadLocalBuffers();
+  GetTimings()->StartSplit("PreSweepingGcVerification");
+  // Disabled due to an issue where we have objects in the bump pointer space which reference dead
+  // objects.
+  // heap_->PreSweepingGcVerification(this);
+  GetTimings()->EndSplit();
+}
+
+void MarkCompact::UpdateAndMarkModUnion() {
+  for (auto& space : heap_->GetContinuousSpaces()) {
+    // If the space is immune then we need to mark the references to other spaces.
+    if (immune_region_.ContainsSpace(space)) {
+      accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+      if (table != nullptr) {
+        // TODO: Improve naming.
+        TimingLogger::ScopedSplit split(
+            space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
+                                     "UpdateAndMarkImageModUnionTable", GetTimings());
+        table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
+      }
+    }
+  }
+}
+
+void MarkCompact::MarkReachableObjects() {
+  GetTimings()->StartSplit("MarkStackAsLive");
+  accounting::ObjectStack* live_stack = heap_->GetLiveStack();
+  heap_->MarkAllocStackAsLive(live_stack);
+  live_stack->Reset();
+  // Recursively process the mark stack.
+  ProcessMarkStack();
+  GetTimings()->EndSplit();
+}
+
+void MarkCompact::ReclaimPhase() {
+  TimingLogger::ScopedSplit split("ReclaimPhase", GetTimings());
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  // Reclaim unmarked objects.
+  Sweep(false);
+  // Swap the live and mark bitmaps for each space which we modified space. This is an
+  // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
+  // bitmaps.
+  GetTimings()->StartSplit("SwapBitmapsAndUnBindBitmaps");
+  SwapBitmaps();
+  GetHeap()->UnBindBitmaps();  // Unbind the live and mark bitmaps.
+  Compact();
+  GetTimings()->EndSplit();
+}
+
+void MarkCompact::ResizeMarkStack(size_t new_size) {
+  std::vector<Object*> temp(mark_stack_->Begin(), mark_stack_->End());
+  CHECK_LE(mark_stack_->Size(), new_size);
+  mark_stack_->Resize(new_size);
+  for (const auto& obj : temp) {
+    mark_stack_->PushBack(obj);
+  }
+}
+
+inline void MarkCompact::MarkStackPush(Object* obj) {
+  if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
+    ResizeMarkStack(mark_stack_->Capacity() * 2);
+  }
+  // The object must be pushed on to the mark stack.
+  mark_stack_->PushBack(obj);
+}
+
+void MarkCompact::ProcessMarkStackCallback(void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->ProcessMarkStack();
+}
+
+mirror::Object* MarkCompact::MarkObjectCallback(mirror::Object* root, void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->MarkObject(root);
+  return root;
+}
+
+void MarkCompact::MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr,
+                                          void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->MarkObject(obj_ptr->AsMirrorPtr());
+}
+
+void MarkCompact::DelayReferenceReferentCallback(mirror::Class* klass, mirror::Reference* ref,
+                                               void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->DelayReferenceReferent(klass, ref);
+}
+
+void MarkCompact::MarkRootCallback(Object** root, void* arg, uint32_t /*thread_id*/,
+                                   RootType /*root_type*/) {
+  reinterpret_cast<MarkCompact*>(arg)->MarkObject(*root);
+}
+
+void MarkCompact::UpdateRootCallback(Object** root, void* arg, uint32_t /*thread_id*/,
+                                     RootType /*root_type*/) {
+  mirror::Object* obj = *root;
+  mirror::Object* new_obj = reinterpret_cast<MarkCompact*>(arg)->GetMarkedForwardAddress(obj);
+  if (obj != new_obj) {
+    *root = new_obj;
+    DCHECK(new_obj != nullptr);
+  }
+}
+
+class UpdateObjectReferencesVisitor {
+ public:
+  explicit UpdateObjectReferencesVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+  void operator()(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+          EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    collector_->UpdateObjectReferences(obj);
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+void MarkCompact::UpdateReferences() {
+  GetTimings()->NewSplit(__FUNCTION__);
+  Runtime* runtime = Runtime::Current();
+  // Update roots.
+  runtime->VisitRoots(UpdateRootCallback, this);
+  // Update object references in mod union tables and spaces.
+  for (const auto& space : heap_->GetContinuousSpaces()) {
+    // If the space is immune then we need to mark the references to other spaces.
+    accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+    if (table != nullptr) {
+      // TODO: Improve naming.
+      TimingLogger::ScopedSplit split(
+          space->IsZygoteSpace() ? "UpdateZygoteModUnionTableReferences" :
+                                   "UpdateImageModUnionTableReferences",
+                                   GetTimings());
+      table->UpdateAndMarkReferences(&UpdateHeapReferenceCallback, this);
+    } else {
+      // No mod union table, so we need to scan the space using bitmap visit.
+      // Scan the space using bitmap visit.
+      accounting::ContinuousSpaceBitmap* bitmap = space->GetLiveBitmap();
+      if (bitmap != nullptr) {
+        UpdateObjectReferencesVisitor visitor(this);
+        bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                 reinterpret_cast<uintptr_t>(space->End()),
+                                 visitor);
+      }
+    }
+  }
+  CHECK(!kMovingClasses)
+      << "Didn't update large object classes since they are assumed to not move.";
+  // Update the system weaks, these should already have been swept.
+  runtime->SweepSystemWeaks(&MarkedForwardingAddressCallback, this);
+  // Update the objects in the bump pointer space last, these objects don't have a bitmap.
+  UpdateObjectReferencesVisitor visitor(this);
+  objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
+                                               reinterpret_cast<uintptr_t>(space_->End()),
+                                               visitor);
+  // Update the reference processor cleared list.
+  heap_->GetReferenceProcessor()->UpdateRoots(&MarkedForwardingAddressCallback, this);
+}
+
+void MarkCompact::Compact() {
+  GetTimings()->NewSplit(__FUNCTION__);
+  CalculateObjectForwardingAddresses();
+  UpdateReferences();
+  MoveObjects();
+  // Space
+  int64_t objects_freed = space_->GetObjectsAllocated() - live_objects_in_space_;
+  int64_t bytes_freed = reinterpret_cast<int64_t>(space_->End()) -
+      reinterpret_cast<int64_t>(bump_pointer_);
+  GetTimings()->NewSplit("RecordFree");
+  space_->RecordFree(objects_freed, bytes_freed);
+  RecordFree(ObjectBytePair(objects_freed, bytes_freed));
+  space_->SetEnd(bump_pointer_);
+  // Need to zero out the memory we freed. TODO: Use madvise for pages.
+  memset(bump_pointer_, 0, bytes_freed);
+}
+
+// Marks all objects in the root set.
+void MarkCompact::MarkRoots() {
+  GetTimings()->NewSplit("MarkRoots");
+  Runtime::Current()->VisitRoots(MarkRootCallback, this);
+}
+
+mirror::Object* MarkCompact::MarkedForwardingAddressCallback(mirror::Object* obj, void* arg) {
+  return reinterpret_cast<MarkCompact*>(arg)->GetMarkedForwardAddress(obj);
+}
+
+inline void MarkCompact::UpdateHeapReference(mirror::HeapReference<mirror::Object>* reference) {
+  mirror::Object* obj = reference->AsMirrorPtr();
+  if (obj != nullptr) {
+    mirror::Object* new_obj = GetMarkedForwardAddress(obj);
+    if (obj != new_obj) {
+      DCHECK(new_obj != nullptr);
+      reference->Assign(new_obj);
+    }
+  }
+}
+
+void MarkCompact::UpdateHeapReferenceCallback(mirror::HeapReference<mirror::Object>* reference,
+                                              void* arg) {
+  reinterpret_cast<MarkCompact*>(arg)->UpdateHeapReference(reference);
+}
+
+class UpdateReferenceVisitor {
+ public:
+  explicit UpdateReferenceVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
+      ALWAYS_INLINE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    collector_->UpdateHeapReference(obj->GetFieldObjectReferenceAddr<kVerifyNone>(offset));
+  }
+
+  void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    collector_->UpdateHeapReference(
+        ref->GetFieldObjectReferenceAddr<kVerifyNone>(mirror::Reference::ReferentOffset()));
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+void MarkCompact::UpdateObjectReferences(mirror::Object* obj) {
+  UpdateReferenceVisitor visitor(this);
+  obj->VisitReferences<kMovingClasses>(visitor, visitor);
+}
+
+inline mirror::Object* MarkCompact::GetMarkedForwardAddress(mirror::Object* obj) const {
+  DCHECK(obj != nullptr);
+  if (objects_before_forwarding_->HasAddress(obj)) {
+    DCHECK(objects_before_forwarding_->Test(obj));
+    mirror::Object* ret =
+        reinterpret_cast<mirror::Object*>(obj->GetLockWord(false).ForwardingAddress());
+    DCHECK(ret != nullptr);
+    return ret;
+  }
+  DCHECK(!space_->HasAddress(obj));
+  DCHECK(IsMarked(obj));
+  return obj;
+}
+
+inline bool MarkCompact::IsMarked(const Object* object) const {
+  if (immune_region_.ContainsObject(object)) {
+    return true;
+  }
+  if (objects_before_forwarding_->HasAddress(object)) {
+    return objects_before_forwarding_->Test(object);
+  }
+  return mark_bitmap_->Test(object);
+}
+
+mirror::Object* MarkCompact::IsMarkedCallback(mirror::Object* object, void* arg) {
+  return reinterpret_cast<MarkCompact*>(arg)->IsMarked(object) ? object : nullptr;
+}
+
+bool MarkCompact::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref_ptr,
+                                              void* arg) {
+  // Side effect free since we call this before ever moving objects.
+  return reinterpret_cast<MarkCompact*>(arg)->IsMarked(ref_ptr->AsMirrorPtr());
+}
+
+void MarkCompact::SweepSystemWeaks() {
+  GetTimings()->StartSplit("SweepSystemWeaks");
+  Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
+  GetTimings()->EndSplit();
+}
+
+bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const {
+  return space != space_ && !immune_region_.ContainsSpace(space);
+}
+
+class MoveObjectVisitor {
+ public:
+  explicit MoveObjectVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+  void operator()(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+          EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+      collector_->MoveObject(obj, obj->SizeOf());
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+void MarkCompact::MoveObject(mirror::Object* obj, size_t len) {
+  // Look at the forwarding address stored in the lock word to know where to copy.
+  DCHECK(space_->HasAddress(obj)) << obj;
+  uintptr_t dest_addr = obj->GetLockWord(false).ForwardingAddress();
+  mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest_addr);
+  DCHECK(space_->HasAddress(dest_obj)) << dest_obj;
+  // Use memmove since there may be overlap.
+  memmove(reinterpret_cast<void*>(dest_addr), reinterpret_cast<const void*>(obj), len);
+  // Restore the saved lock word if needed.
+  LockWord lock_word;
+  if (UNLIKELY(objects_with_lockword_->Test(obj))) {
+    lock_word = lock_words_to_restore_.front();
+    lock_words_to_restore_.pop_front();
+  }
+  dest_obj->SetLockWord(lock_word, false);
+}
+
+void MarkCompact::MoveObjects() {
+  GetTimings()->NewSplit(__FUNCTION__);
+  // Move the objects in the before forwarding bitmap.
+  MoveObjectVisitor visitor(this);
+  objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
+                                               reinterpret_cast<uintptr_t>(space_->End()),
+                                               visitor);
+  CHECK(lock_words_to_restore_.empty());
+}
+
+void MarkCompact::Sweep(bool swap_bitmaps) {
+  DCHECK(mark_stack_->IsEmpty());
+  TimingLogger::ScopedSplit split("Sweep", GetTimings());
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (!ShouldSweepSpace(alloc_space)) {
+        continue;
+      }
+      TimingLogger::ScopedSplit split(
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings());
+      RecordFree(alloc_space->Sweep(swap_bitmaps));
+    }
+  }
+  SweepLargeObjects(swap_bitmaps);
+}
+
+void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
+  TimingLogger::ScopedSplit split("SweepLargeObjects", GetTimings());
+  RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
+}
+
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void MarkCompact::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
+                                                         &HeapReferenceMarkedCallback, this);
+}
+
+class MarkCompactMarkObjectVisitor {
+ public:
+  explicit MarkCompactMarkObjectVisitor(MarkCompact* collector) : collector_(collector) {
+  }
+
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const ALWAYS_INLINE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    // Object was already verified when we scanned it.
+    collector_->MarkObject(obj->GetFieldObject<mirror::Object, kVerifyNone>(offset));
+  }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    collector_->DelayReferenceReferent(klass, ref);
+  }
+
+ private:
+  MarkCompact* const collector_;
+};
+
+// Visit all of the references of an object and update.
+void MarkCompact::ScanObject(Object* obj) {
+  MarkCompactMarkObjectVisitor visitor(this);
+  obj->VisitReferences<kMovingClasses>(visitor, visitor);
+}
+
+// Scan anything that's on the mark stack.
+void MarkCompact::ProcessMarkStack() {
+  GetTimings()->StartSplit("ProcessMarkStack");
+  while (!mark_stack_->IsEmpty()) {
+    Object* obj = mark_stack_->PopBack();
+    DCHECK(obj != nullptr);
+    ScanObject(obj);
+  }
+  GetTimings()->EndSplit();
+}
+
+void MarkCompact::SetSpace(space::BumpPointerSpace* space) {
+  DCHECK(space != nullptr);
+  space_ = space;
+}
+
+void MarkCompact::FinishPhase() {
+  TimingLogger::ScopedSplit split("FinishPhase", GetTimings());
+  space_ = nullptr;
+  CHECK(mark_stack_->IsEmpty());
+  mark_stack_->Reset();
+  // Clear all of the spaces' mark bitmaps.
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  heap_->ClearMarkedObjects();
+  // Release our bitmaps.
+  objects_before_forwarding_.reset(nullptr);
+  objects_with_lockword_.reset(nullptr);
+}
+
+void MarkCompact::RevokeAllThreadLocalBuffers() {
+  GetTimings()->StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+  GetHeap()->RevokeAllThreadLocalBuffers();
+  GetTimings()->EndSplit();
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
new file mode 100644
index 0000000..25cfe0f
--- /dev/null
+++ b/runtime/gc/collector/mark_compact.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
+#define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
+
+#include <deque>
+#include <memory>  // For unique_ptr.
+
+#include "atomic.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "garbage_collector.h"
+#include "gc/accounting/heap_bitmap.h"
+#include "immune_region.h"
+#include "lock_word.h"
+#include "object_callbacks.h"
+#include "offsets.h"
+
+namespace art {
+
+class Thread;
+
+namespace mirror {
+  class Class;
+  class Object;
+}  // namespace mirror
+
+namespace gc {
+
+class Heap;
+
+namespace accounting {
+  template <typename T> class AtomicStack;
+  typedef AtomicStack<mirror::Object*> ObjectStack;
+}  // namespace accounting
+
+namespace space {
+  class ContinuousMemMapAllocSpace;
+  class ContinuousSpace;
+}  // namespace space
+
+namespace collector {
+
+class MarkCompact : public GarbageCollector {
+ public:
+  explicit MarkCompact(Heap* heap, const std::string& name_prefix = "");
+  ~MarkCompact() {}
+
+  virtual void RunPhases() OVERRIDE NO_THREAD_SAFETY_ANALYSIS;
+  void InitializePhase();
+  void MarkingPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void ReclaimPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void FinishPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void MarkReachableObjects()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  virtual GcType GetGcType() const OVERRIDE {
+    return kGcTypePartial;
+  }
+  virtual CollectorType GetCollectorType() const OVERRIDE {
+    return kCollectorTypeMC;
+  }
+
+  // Sets which space we will be copying objects in.
+  void SetSpace(space::BumpPointerSpace* space);
+
+  // Initializes internal structures.
+  void Init();
+
+  // Find the default mark bitmap.
+  void FindDefaultMarkBitmap();
+
+  void ScanObject(mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Marks the root set at the start of a garbage collection.
+  void MarkRoots()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
+  // the image. Mark that portion of the heap as immune.
+  void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+
+  void UnBindBitmaps()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void ProcessReferences(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Sweeps unmarked objects to complete the garbage collection.
+  void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Sweeps unmarked objects to complete the garbage collection.
+  void SweepLargeObjects(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void SweepSystemWeaks()
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static void MarkRootCallback(mirror::Object** root, void* arg, uint32_t /*tid*/,
+                               RootType /*root_type*/)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static void MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static bool HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Object>* ref_ptr,
+                                          void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static void ProcessMarkStackCallback(void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  static void DelayReferenceReferentCallback(mirror::Class* klass, mirror::Reference* ref,
+                                             void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Schedules an unmarked object for reference processing.
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+ protected:
+  // Returns null if the object is not marked, otherwise returns the forwarding address (same as
+  // object for non movable things).
+  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
+  // mark, otherwise we unmark.
+  bool MarkLargeObject(const mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Expand mark stack to 2x its current size.
+  void ResizeMarkStack(size_t new_size);
+
+  // Returns true if we should sweep the space.
+  bool ShouldSweepSpace(space::ContinuousSpace* space) const;
+
+  // Push an object onto the mark stack.
+  void MarkStackPush(mirror::Object* obj);
+
+  void UpdateAndMarkModUnion()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Recursively blackens objects on the mark stack.
+  void ProcessMarkStack()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  // 3 pass mark compact approach.
+  void Compact() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  // Calculate the forwarding address of objects marked as "live" in the objects_before_forwarding
+  // bitmap.
+  void CalculateObjectForwardingAddresses()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  // Update the references of objects by using the forwarding addresses.
+  void UpdateReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  static void UpdateRootCallback(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
+                                 RootType /*root_type*/)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  // Move objects and restore lock words.
+  void MoveObjects() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Move a single object to its forward address.
+  void MoveObject(mirror::Object* obj, size_t len) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Mark a single object.
+  void MarkObject(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
+                                                                Locks::mutator_lock_);
+  bool IsMarked(const mirror::Object* obj) const
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void ForwardObject(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
+                                                                   Locks::mutator_lock_);
+  // Update a single heap reference.
+  void UpdateHeapReference(mirror::HeapReference<mirror::Object>* reference)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void UpdateHeapReferenceCallback(mirror::HeapReference<mirror::Object>* reference,
+                                          void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Update all of the references of a single object.
+  void UpdateObjectReferences(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Revoke all the thread-local buffers.
+  void RevokeAllThreadLocalBuffers();
+
+  accounting::ObjectStack* mark_stack_;
+
+  // Immune region, every object inside the immune region is assumed to be marked.
+  ImmuneRegion immune_region_;
+
+  // Bump pointer space which we are collecting.
+  space::BumpPointerSpace* space_;
+  // Cached mark bitmap as an optimization.
+  accounting::HeapBitmap* mark_bitmap_;
+
+  // The name of the collector.
+  std::string collector_name_;
+
+  // The bump pointer in the space where the next forwarding address will be.
+  byte* bump_pointer_;
+  // How many live objects we have in the space.
+  size_t live_objects_in_space_;
+
+  // Bitmap which describes which objects we have to move, need to do / 2 so that we can handle
+  // objects which are only 8 bytes.
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> objects_before_forwarding_;
+  // Bitmap which describes which lock words we need to restore.
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> objects_with_lockword_;
+  // Which lock words we need to restore as we are moving objects.
+  std::deque<LockWord> lock_words_to_restore_;
+
+ private:
+  friend class BitmapSetSlowPathVisitor;
+  friend class CalculateObjectForwardingAddressVisitor;
+  friend class MarkCompactMarkObjectVisitor;
+  friend class MoveObjectVisitor;
+  friend class UpdateObjectReferencesVisitor;
+  friend class UpdateReferenceVisitor;
+  DISALLOW_COPY_AND_ASSIGN(MarkCompact);
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index c72913a..d08796b 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -43,10 +43,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 
-using ::art::mirror::ArtField;
-using ::art::mirror::Class;
 using ::art::mirror::Object;
-using ::art::mirror::ObjectArray;
 
 namespace art {
 namespace gc {
@@ -84,7 +81,7 @@
 static constexpr bool kRevokeRosAllocThreadLocalBuffersAtCheckpoint = true;
 
 void MarkSweep::BindBitmaps() {
-  timings_.StartSplit("BindBitmaps");
+  GetTimings()->StartSplit("BindBitmaps");
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
@@ -92,7 +89,7 @@
       CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
     }
   }
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
@@ -113,7 +110,7 @@
 }
 
 void MarkSweep::InitializePhase() {
-  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  TimingLogger::ScopedSplit split("InitializePhase", GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_region_.Reset();
@@ -135,9 +132,9 @@
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     mark_bitmap_ = heap_->GetMarkBitmap();
   }
-  if (!clear_soft_references_) {
+  if (!GetCurrentIteration()->GetClearSoftReferences()) {
     // Always clear soft references if a non-sticky collection.
-    clear_soft_references_ = GetGcType() != collector::kGcTypeSticky;
+    GetCurrentIteration()->SetClearSoftReferences(GetGcType() != collector::kGcTypeSticky);
   }
 }
 
@@ -173,15 +170,15 @@
 }
 
 void MarkSweep::ProcessReferences(Thread* self) {
-  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  TimingLogger::ScopedSplit split("ProcessReferences", GetTimings());
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      true, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback, &MarkObjectCallback,
-      &ProcessMarkStackCallback, this);
+      true, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
+      &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this);
 }
 
 void MarkSweep::PausePhase() {
-  TimingLogger::ScopedSplit split("(Paused)PausePhase", &timings_);
+  TimingLogger::ScopedSplit split("(Paused)PausePhase", GetTimings());
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   if (IsConcurrent()) {
@@ -193,7 +190,7 @@
     RecursiveMarkDirtyObjects(true, accounting::CardTable::kCardDirty);
   }
   {
-    TimingLogger::ScopedSplit split("SwapStacks", &timings_);
+    TimingLogger::ScopedSplit split("SwapStacks", GetTimings());
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     heap_->SwapStacks(self);
     live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
@@ -201,9 +198,9 @@
     // stacks and don't want anybody to allocate into the live stack.
     RevokeAllThreadLocalAllocationStacks(self);
   }
-  timings_.StartSplit("PreSweepingGcVerification");
+  GetTimings()->StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
   // Disallow new system weaks to prevent a race which occurs when someone adds a new system
   // weak before we sweep them. Since this new system weak may not be marked, the GC may
   // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
@@ -220,7 +217,7 @@
     Thread* self = Thread::Current();
     CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
     // Process dirty cards and add dirty cards to mod union tables, also ages cards.
-    heap_->ProcessCards(timings_, false);
+    heap_->ProcessCards(GetTimings(), false);
     // The checkpoint root marking is required to avoid a race condition which occurs if the
     // following happens during a reference write:
     // 1. mutator dirties the card (write barrier)
@@ -246,22 +243,19 @@
 
 void MarkSweep::RevokeAllThreadLocalAllocationStacks(Thread* self) {
   if (kUseThreadLocalAllocationStack) {
-    timings_.NewSplit("RevokeAllThreadLocalAllocationStacks");
+    GetTimings()->NewSplit("RevokeAllThreadLocalAllocationStacks");
     Locks::mutator_lock_->AssertExclusiveHeld(self);
     heap_->RevokeAllThreadLocalAllocationStacks(self);
   }
 }
 
 void MarkSweep::MarkingPhase() {
-  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  TimingLogger::ScopedSplit split("MarkingPhase", GetTimings());
   Thread* self = Thread::Current();
-
   BindBitmaps();
   FindDefaultSpaceBitmap();
-
   // Process dirty cards and add dirty cards to mod union tables.
-  heap_->ProcessCards(timings_, false);
-
+  heap_->ProcessCards(GetTimings(), false);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   MarkRoots(self);
   MarkReachableObjects();
@@ -274,7 +268,7 @@
     if (immune_region_.ContainsSpace(space)) {
       const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
           "UpdateAndMarkImageModUnionTable";
-      TimingLogger::ScopedSplit split(name, &timings_);
+      TimingLogger::ScopedSplit split(name, GetTimings());
       accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
       CHECK(mod_union_table != nullptr);
       mod_union_table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
@@ -289,7 +283,7 @@
 }
 
 void MarkSweep::ReclaimPhase() {
-  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  TimingLogger::ScopedSplit split("ReclaimPhase", GetTimings());
   Thread* self = Thread::Current();
   // Process the references concurrently.
   ProcessReferences(self);
@@ -304,18 +298,18 @@
     // Swap the live and mark bitmaps for each space which we modified space. This is an
     // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
     // bitmaps.
-    timings_.StartSplit("SwapBitmaps");
+    GetTimings()->StartSplit("SwapBitmaps");
     SwapBitmaps();
-    timings_.EndSplit();
+    GetTimings()->EndSplit();
 
     // Unbind the live and mark bitmaps.
-    TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+    TimingLogger::ScopedSplit split("UnBindBitmaps", GetTimings());
     GetHeap()->UnBindBitmaps();
   }
 }
 
 void MarkSweep::FindDefaultSpaceBitmap() {
-  TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
+  TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", GetTimings());
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     accounting::ContinuousSpaceBitmap* bitmap = space->GetMarkBitmap();
     // We want to have the main space instead of non moving if possible.
@@ -514,9 +508,9 @@
 void MarkSweep::MarkRoots(Thread* self) {
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // If we exclusively hold the mutator lock, all threads must be suspended.
-    timings_.StartSplit("MarkRoots");
+    GetTimings()->StartSplit("MarkRoots");
     Runtime::Current()->VisitRoots(MarkRootCallback, this);
-    timings_.EndSplit();
+    GetTimings()->EndSplit();
     RevokeAllThreadLocalAllocationStacks(self);
   } else {
     MarkRootsCheckpoint(self, kRevokeRosAllocThreadLocalBuffersAtCheckpoint);
@@ -528,16 +522,16 @@
 }
 
 void MarkSweep::MarkNonThreadRoots() {
-  timings_.StartSplit("MarkNonThreadRoots");
+  GetTimings()->StartSplit("MarkNonThreadRoots");
   Runtime::Current()->VisitNonThreadRoots(MarkRootCallback, this);
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
-  timings_.StartSplit("MarkConcurrentRoots");
+  GetTimings()->StartSplit("MarkConcurrentRoots");
   // Visit all runtime roots and clear dirty flags.
   Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, flags);
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 class ScanObjectVisitor {
@@ -758,7 +752,7 @@
     Thread* self = Thread::Current();
     // Can't have a different split for each space since multiple spaces can have their cards being
     // scanned at the same time.
-    timings_.StartSplit(paused ? "(Paused)ScanGrayObjects" : "ScanGrayObjects");
+    GetTimings()->StartSplit(paused ? "(Paused)ScanGrayObjects" : "ScanGrayObjects");
     // Try to take some of the mark stack since we can pass this off to the worker tasks.
     Object** mark_stack_begin = mark_stack_->Begin();
     Object** mark_stack_end = mark_stack_->End();
@@ -811,28 +805,28 @@
     thread_pool->StartWorkers(self);
     thread_pool->Wait(self, true, true);
     thread_pool->StopWorkers(self);
-    timings_.EndSplit();
+    GetTimings()->EndSplit();
   } else {
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
       if (space->GetMarkBitmap() != nullptr) {
         // Image spaces are handled properly since live == marked for them.
         switch (space->GetGcRetentionPolicy()) {
           case space::kGcRetentionPolicyNeverCollect:
-            timings_.StartSplit(paused ? "(Paused)ScanGrayImageSpaceObjects" :
+            GetTimings()->StartSplit(paused ? "(Paused)ScanGrayImageSpaceObjects" :
                 "ScanGrayImageSpaceObjects");
             break;
           case space::kGcRetentionPolicyFullCollect:
-            timings_.StartSplit(paused ? "(Paused)ScanGrayZygoteSpaceObjects" :
+            GetTimings()->StartSplit(paused ? "(Paused)ScanGrayZygoteSpaceObjects" :
                 "ScanGrayZygoteSpaceObjects");
             break;
           case space::kGcRetentionPolicyAlwaysCollect:
-            timings_.StartSplit(paused ? "(Paused)ScanGrayAllocSpaceObjects" :
+            GetTimings()->StartSplit(paused ? "(Paused)ScanGrayAllocSpaceObjects" :
                 "ScanGrayAllocSpaceObjects");
             break;
           }
         ScanObjectVisitor visitor(this);
         card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, minimum_age);
-        timings_.EndSplit();
+        GetTimings()->EndSplit();
       }
     }
   }
@@ -869,7 +863,7 @@
 // Populates the mark stack based on the set of marked objects and
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
-  TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
+  TimingLogger::ScopedSplit split("RecursiveMark", GetTimings());
   // RecursiveMark will build the lists of known instances of the Reference classes. See
   // DelayReferenceReferent for details.
   if (kUseRecursiveMark) {
@@ -937,24 +931,24 @@
 
 void MarkSweep::ReMarkRoots() {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  timings_.StartSplit("(Paused)ReMarkRoots");
+  GetTimings()->StartSplit("(Paused)ReMarkRoots");
   Runtime::Current()->VisitRoots(
       MarkRootCallback, this, static_cast<VisitRootFlags>(kVisitRootFlagNewRoots |
                                                           kVisitRootFlagStopLoggingNewRoots |
                                                           kVisitRootFlagClearRootLog));
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
   if (kVerifyRootsMarked) {
-    timings_.StartSplit("(Paused)VerifyRoots");
+    GetTimings()->StartSplit("(Paused)VerifyRoots");
     Runtime::Current()->VisitRoots(VerifyRootMarked, this);
-    timings_.EndSplit();
+    GetTimings()->EndSplit();
   }
 }
 
 void MarkSweep::SweepSystemWeaks(Thread* self) {
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  timings_.StartSplit("SweepSystemWeaks");
+  GetTimings()->StartSplit("SweepSystemWeaks");
   Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 mirror::Object* MarkSweep::VerifySystemWeakIsLiveCallback(Object* obj, void* arg) {
@@ -1012,7 +1006,7 @@
 void MarkSweep::MarkRootsCheckpoint(Thread* self,
                                     bool revoke_ros_alloc_thread_local_buffers_at_checkpoint) {
   CheckpointMarkThreadRoots check_point(this, revoke_ros_alloc_thread_local_buffers_at_checkpoint);
-  timings_.StartSplit("MarkRootsCheckpoint");
+  GetTimings()->StartSplit("MarkRootsCheckpoint");
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   // Request the check point is run on all threads returning a count of the threads that must
   // run through the barrier including self.
@@ -1027,19 +1021,17 @@
   }
   Locks::mutator_lock_->SharedLock(self);
   Locks::heap_bitmap_lock_->ExclusiveLock(self);
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  timings_.StartSplit("SweepArray");
+  GetTimings()->StartSplit("SweepArray");
   Thread* self = Thread::Current();
   mirror::Object** chunk_free_buffer = reinterpret_cast<mirror::Object**>(
       sweep_array_free_buffer_mem_map_->BaseBegin());
   size_t chunk_free_pos = 0;
-  size_t freed_bytes = 0;
-  size_t freed_large_object_bytes = 0;
-  size_t freed_objects = 0;
-  size_t freed_large_objects = 0;
+  ObjectBytePair freed;
+  ObjectBytePair freed_los;
   // How many objects are left in the array, modified after each space is swept.
   Object** objects = allocations->Begin();
   size_t count = allocations->Size();
@@ -1080,10 +1072,10 @@
         // if needed.
         if (!mark_bitmap->Test(obj)) {
           if (chunk_free_pos >= kSweepArrayChunkFreeSize) {
-            timings_.StartSplit("FreeList");
-            freed_objects += chunk_free_pos;
-            freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
-            timings_.EndSplit();
+            GetTimings()->StartSplit("FreeList");
+            freed.objects += chunk_free_pos;
+            freed.bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
+            GetTimings()->EndSplit();
             chunk_free_pos = 0;
           }
           chunk_free_buffer[chunk_free_pos++] = obj;
@@ -1093,10 +1085,10 @@
       }
     }
     if (chunk_free_pos > 0) {
-      timings_.StartSplit("FreeList");
-      freed_objects += chunk_free_pos;
-      freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
-      timings_.EndSplit();
+      GetTimings()->StartSplit("FreeList");
+      freed.objects += chunk_free_pos;
+      freed.bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
+      GetTimings()->EndSplit();
       chunk_free_pos = 0;
     }
     // All of the references which space contained are no longer in the allocation stack, update
@@ -1117,23 +1109,16 @@
       continue;
     }
     if (!large_mark_objects->Test(obj)) {
-      ++freed_large_objects;
-      freed_large_object_bytes += large_object_space->Free(self, obj);
+      ++freed_los.objects;
+      freed_los.bytes += large_object_space->Free(self, obj);
     }
   }
-  timings_.EndSplit();
-
-  timings_.StartSplit("RecordFree");
-  VLOG(heap) << "Freed " << freed_objects << "/" << count << " objects with size "
-             << PrettySize(freed_bytes);
-  RecordFree(freed_objects, freed_bytes);
-  RecordFreeLargeObjects(freed_large_objects, freed_large_object_bytes);
-  timings_.EndSplit();
-
-  timings_.StartSplit("ResetStack");
+  GetTimings()->NewSplit("RecordFree");
+  RecordFree(freed);
+  RecordFreeLOS(freed_los);
+  GetTimings()->NewSplit("ResetStack");
   allocations->Reset();
-  timings_.EndSplit();
-
+  GetTimings()->EndSplit();
   sweep_array_free_buffer_mem_map_->MadviseDontNeedAndZero();
 }
 
@@ -1142,33 +1127,27 @@
   CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
   // Mark everything allocated since the last as GC live so that we can sweep concurrently,
   // knowing that new allocations won't be marked as live.
-  timings_.StartSplit("MarkStackAsLive");
+  GetTimings()->StartSplit("MarkStackAsLive");
   accounting::ObjectStack* live_stack = heap_->GetLiveStack();
   heap_->MarkAllocStackAsLive(live_stack);
   live_stack->Reset();
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 
   DCHECK(mark_stack_->IsEmpty());
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
       TimingLogger::ScopedSplit split(
-          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", &timings_);
-      size_t freed_objects = 0;
-      size_t freed_bytes = 0;
-      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
-      RecordFree(freed_objects, freed_bytes);
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", GetTimings());
+      RecordFree(alloc_space->Sweep(swap_bitmaps));
     }
   }
   SweepLargeObjects(swap_bitmaps);
 }
 
 void MarkSweep::SweepLargeObjects(bool swap_bitmaps) {
-  TimingLogger::ScopedSplit split("SweepLargeObjects", &timings_);
-  size_t freed_objects = 0;
-  size_t freed_bytes = 0;
-  heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
-  RecordFreeLargeObjects(freed_objects, freed_bytes);
+  TimingLogger::ScopedSplit split("SweepLargeObjects", GetTimings());
+  RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
@@ -1236,7 +1215,7 @@
 
 // Scan anything that's on the mark stack.
 void MarkSweep::ProcessMarkStack(bool paused) {
-  timings_.StartSplit(paused ? "(Paused)ProcessMarkStack" : "ProcessMarkStack");
+  GetTimings()->StartSplit(paused ? "(Paused)ProcessMarkStack" : "ProcessMarkStack");
   size_t thread_count = GetThreadCount(paused);
   if (kParallelProcessMarkStack && thread_count > 1 &&
       mark_stack_->Size() >= kMinimumParallelMarkStackSize) {
@@ -1269,12 +1248,10 @@
       ScanObject(obj);
     }
   }
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
-inline bool MarkSweep::IsMarked(const Object* object) const
-    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-  DCHECK(object != nullptr);
+inline bool MarkSweep::IsMarked(const Object* object) const {
   if (immune_region_.ContainsObject(object)) {
     return true;
   }
@@ -1285,7 +1262,7 @@
 }
 
 void MarkSweep::FinishPhase() {
-  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  TimingLogger::ScopedSplit split("FinishPhase", GetTimings());
   if (kCountScannedTypes) {
     VLOG(gc) << "MarkSweep scanned classes=" << class_count_.LoadRelaxed()
         << " arrays=" << array_count_.LoadRelaxed() << " other=" << other_count_.LoadRelaxed();
@@ -1322,9 +1299,9 @@
     // not be in use.
     GetHeap()->AssertAllBumpPointerSpaceThreadLocalBuffersAreRevoked();
   } else {
-    timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+    GetTimings()->StartSplit("(Paused)RevokeAllThreadLocalBuffers");
     GetHeap()->RevokeAllThreadLocalBuffers();
-    timings_.EndSplit();
+    GetTimings()->EndSplit();
   }
 }
 
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index a44d8a1..2780099 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -215,7 +215,8 @@
 
  protected:
   // Returns true if the object has its bit set in the mark bitmap.
-  bool IsMarked(const mirror::Object* object) const;
+  bool IsMarked(const mirror::Object* object) const
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index badf8b3..8a3ac9d 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -41,22 +41,12 @@
 #include "jni_internal.h"
 #include "mark_sweep-inl.h"
 #include "monitor.h"
-#include "mirror/art_field.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/class_loader.h"
-#include "mirror/dex_cache.h"
 #include "mirror/reference-inl.h"
 #include "mirror/object-inl.h"
-#include "mirror/object_array.h"
-#include "mirror/object_array-inl.h"
 #include "runtime.h"
-#include "stack.h"
 #include "thread-inl.h"
 #include "thread_list.h"
-#include "verifier/method_verifier.h"
 
-using ::art::mirror::Class;
 using ::art::mirror::Object;
 
 namespace art {
@@ -69,7 +59,7 @@
 static constexpr size_t kLargeObjectBytesAllocatedThreshold = 16 * MB;
 
 void SemiSpace::BindBitmaps() {
-  timings_.StartSplit("BindBitmaps");
+  GetTimings()->StartSplit("BindBitmaps");
   WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
@@ -93,7 +83,7 @@
     // We won't collect the large object space if a bump pointer space only collection.
     is_large_object_space_immune_ = true;
   }
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 SemiSpace::SemiSpace(Heap* heap, bool generational, const std::string& name_prefix)
@@ -141,7 +131,7 @@
 }
 
 void SemiSpace::InitializePhase() {
-  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  TimingLogger::ScopedSplit split("InitializePhase", GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_region_.Reset();
@@ -161,11 +151,11 @@
 }
 
 void SemiSpace::ProcessReferences(Thread* self) {
-  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  TimingLogger::ScopedSplit split("ProcessReferences", GetTimings());
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      false, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback,
-      &MarkObjectCallback, &ProcessMarkStackCallback, this);
+      false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
+      &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this);
 }
 
 void SemiSpace::MarkingPhase() {
@@ -186,8 +176,9 @@
   // to prevent fragmentation.
   RevokeAllThreadLocalBuffers();
   if (generational_) {
-    if (gc_cause_ == kGcCauseExplicit || gc_cause_ == kGcCauseForNativeAlloc ||
-        clear_soft_references_) {
+    if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit ||
+        GetCurrentIteration()->GetGcCause() == kGcCauseForNativeAlloc ||
+        GetCurrentIteration()->GetClearSoftReferences()) {
       // If an explicit, native allocation-triggered, or last attempt
       // collection, collect the whole heap.
       whole_heap_collection_ = true;
@@ -201,21 +192,15 @@
     }
   }
 
-  if (!clear_soft_references_) {
-    if (!generational_) {
-      // If non-generational, always clear soft references.
-      clear_soft_references_ = true;
-    } else {
-      // If generational, clear soft references if a whole heap collection.
-      if (whole_heap_collection_) {
-        clear_soft_references_ = true;
-      }
-    }
+  if (!generational_ || whole_heap_collection_) {
+    // If non-generational, always clear soft references.
+    // If generational, clear soft references if a whole heap collection.
+    GetCurrentIteration()->SetClearSoftReferences(true);
   }
 
   Locks::mutator_lock_->AssertExclusiveHeld(self_);
 
-  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  TimingLogger::ScopedSplit split("MarkingPhase", GetTimings());
   if (generational_) {
     // If last_gc_to_space_end_ is out of the bounds of the from-space
     // (the to-space from last GC), then point it to the beginning of
@@ -230,14 +215,14 @@
   // Assume the cleared space is already empty.
   BindBitmaps();
   // Process dirty cards and add dirty cards to mod-union tables.
-  heap_->ProcessCards(timings_, kUseRememberedSet && generational_);
+  heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_);
   // Clear the whole card table since we can not Get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
-  timings_.NewSplit("ClearCardTable");
+  GetTimings()->NewSplit("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
   // Need to do this before the checkpoint since we don't want any threads to add references to
   // the live stack during the recursive mark.
-  timings_.NewSplit("SwapStacks");
+  GetTimings()->NewSplit("SwapStacks");
   if (kUseThreadLocalAllocationStack) {
     heap_->RevokeAllThreadLocalAllocationStacks(self_);
   }
@@ -255,7 +240,7 @@
     ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     SweepSystemWeaks();
   }
-  timings_.NewSplit("RecordFree");
+  GetTimings()->NewSplit("RecordFree");
   // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
   // before they are properly counted.
   RevokeAllThreadLocalBuffers();
@@ -267,14 +252,14 @@
   CHECK_LE(to_objects, from_objects);
   // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
   // space.
-  RecordFree(from_objects - to_objects, from_bytes - to_bytes);
+  RecordFree(ObjectBytePair(from_objects - to_objects, from_bytes - to_bytes));
   // Clear and protect the from space.
   from_space_->Clear();
   VLOG(heap) << "Protecting from_space_: " << *from_space_;
   from_space_->GetMemMap()->Protect(kProtectFromSpace ? PROT_NONE : PROT_READ);
-  timings_.StartSplit("PreSweepingGcVerification");
+  GetTimings()->StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
   if (swap_semi_spaces_) {
     heap_->SwapSemiSpaces();
   }
@@ -290,7 +275,7 @@
         TimingLogger::ScopedSplit split(
             space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
                                      "UpdateAndMarkImageModUnionTable",
-                                     &timings_);
+                                     GetTimings());
         table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
       } else if (heap_->FindRememberedSetFromSpace(space) != nullptr) {
         DCHECK(kUseRememberedSet);
@@ -369,12 +354,12 @@
 };
 
 void SemiSpace::MarkReachableObjects() {
-  timings_.StartSplit("MarkStackAsLive");
+  GetTimings()->StartSplit("MarkStackAsLive");
   accounting::ObjectStack* live_stack = heap_->GetLiveStack();
   heap_->MarkAllocStackAsLive(live_stack);
   live_stack->Reset();
 
-  timings_.NewSplit("UpdateAndMarkRememberedSets");
+  GetTimings()->NewSplit("UpdateAndMarkRememberedSets");
   for (auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune and has no mod union table (the
     // non-moving space when the bump pointer space only collection is
@@ -413,7 +398,7 @@
   }
 
   if (is_large_object_space_immune_) {
-    timings_.NewSplit("VisitLargeObjects");
+    GetTimings()->NewSplit("VisitLargeObjects");
     DCHECK(generational_ && !whole_heap_collection_);
     // Delay copying the live set to the marked set until here from
     // BindBitmaps() as the large objects on the allocation stack may
@@ -431,13 +416,13 @@
                                         reinterpret_cast<uintptr_t>(large_object_space->End()),
                                         visitor);
   }
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
   // Recursively process the mark stack.
   ProcessMarkStack();
 }
 
 void SemiSpace::ReclaimPhase() {
-  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  TimingLogger::ScopedSplit split("ReclaimPhase", GetTimings());
   {
     WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     // Reclaim unmarked objects.
@@ -445,11 +430,11 @@
     // Swap the live and mark bitmaps for each space which we modified space. This is an
     // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
     // bitmaps.
-    timings_.StartSplit("SwapBitmaps");
+    GetTimings()->StartSplit("SwapBitmaps");
     SwapBitmaps();
-    timings_.EndSplit();
+    GetTimings()->EndSplit();
     // Unbind the live and mark bitmaps.
-    TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+    TimingLogger::ScopedSplit split("UnBindBitmaps", GetTimings());
     GetHeap()->UnBindBitmaps();
   }
   if (saved_bytes_ > 0) {
@@ -644,7 +629,7 @@
 
 // Marks all objects in the root set.
 void SemiSpace::MarkRoots() {
-  timings_.NewSplit("MarkRoots");
+  GetTimings()->NewSplit("MarkRoots");
   // TODO: Visit up image roots as well?
   Runtime::Current()->VisitRoots(MarkRootCallback, this);
 }
@@ -670,9 +655,9 @@
 }
 
 void SemiSpace::SweepSystemWeaks() {
-  timings_.StartSplit("SweepSystemWeaks");
+  GetTimings()->StartSplit("SweepSystemWeaks");
   Runtime::Current()->SweepSystemWeaks(MarkedForwardingAddressCallback, this);
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const {
@@ -681,7 +666,7 @@
 
 void SemiSpace::Sweep(bool swap_bitmaps) {
   DCHECK(mark_stack_->IsEmpty());
-  TimingLogger::ScopedSplit split("Sweep", &timings_);
+  TimingLogger::ScopedSplit split("Sweep", GetTimings());
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
@@ -689,11 +674,8 @@
         continue;
       }
       TimingLogger::ScopedSplit split(
-          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
-      size_t freed_objects = 0;
-      size_t freed_bytes = 0;
-      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
-      RecordFree(freed_objects, freed_bytes);
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings());
+      RecordFree(alloc_space->Sweep(swap_bitmaps));
     }
   }
   if (!is_large_object_space_immune_) {
@@ -703,11 +685,8 @@
 
 void SemiSpace::SweepLargeObjects(bool swap_bitmaps) {
   DCHECK(!is_large_object_space_immune_);
-  TimingLogger::ScopedSplit split("SweepLargeObjects", &timings_);
-  size_t freed_objects = 0;
-  size_t freed_bytes = 0;
-  heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
-  RecordFreeLargeObjects(freed_objects, freed_bytes);
+  TimingLogger::ScopedSplit split("SweepLargeObjects", GetTimings());
+  RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
@@ -760,7 +739,7 @@
     DCHECK(mark_bitmap != nullptr);
     DCHECK_EQ(live_bitmap, mark_bitmap);
   }
-  timings_.StartSplit("ProcessMarkStack");
+  GetTimings()->StartSplit("ProcessMarkStack");
   while (!mark_stack_->IsEmpty()) {
     Object* obj = mark_stack_->PopBack();
     if (generational_ && !whole_heap_collection_ && promo_dest_space->HasAddress(obj)) {
@@ -771,7 +750,7 @@
     }
     ScanObject(obj);
   }
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 inline Object* SemiSpace::GetMarkedForwardAddress(mirror::Object* obj) const
@@ -788,7 +767,7 @@
     // Already forwarded, must be marked.
     return obj;
   }
-  return heap_->GetMarkBitmap()->Test(obj) ? obj : nullptr;
+  return mark_bitmap_->Test(obj) ? obj : nullptr;
 }
 
 void SemiSpace::SetToSpace(space::ContinuousMemMapAllocSpace* to_space) {
@@ -802,7 +781,7 @@
 }
 
 void SemiSpace::FinishPhase() {
-  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  TimingLogger::ScopedSplit split("FinishPhase", GetTimings());
   // Null the "to" and "from" spaces since compacting from one to the other isn't valid until
   // further action is done by the heap.
   to_space_ = nullptr;
@@ -843,9 +822,9 @@
 }
 
 void SemiSpace::RevokeAllThreadLocalBuffers() {
-  timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+  GetTimings()->StartSplit("(Paused)RevokeAllThreadLocalBuffers");
   GetHeap()->RevokeAllThreadLocalBuffers();
-  timings_.EndSplit();
+  GetTimings()->EndSplit();
 }
 
 }  // namespace collector
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index c0a6b6a..530a3c9 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,6 +34,8 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
+  // Mark compact colector.
+  kCollectorTypeMC,
   // Heap trimming collector, doesn't do any actual collecting.
   kCollectorTypeHeapTrim,
   // A (mostly) concurrent copying collector.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e6a5380..6c63e5f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -36,6 +36,7 @@
 #include "gc/accounting/remembered_set.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/collector/concurrent_copying.h"
+#include "gc/collector/mark_compact.h"
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/collector/partial_mark_sweep.h"
 #include "gc/collector/semi_space.h"
@@ -331,9 +332,10 @@
     semi_space_collector_ = new collector::SemiSpace(this, generational,
                                                      generational ? "generational" : "");
     garbage_collectors_.push_back(semi_space_collector_);
-
     concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
     garbage_collectors_.push_back(concurrent_copying_collector_);
+    mark_compact_collector_ = new collector::MarkCompact(this);
+    garbage_collectors_.push_back(mark_compact_collector_);
   }
 
   if (GetImageSpace() != nullptr && main_space_ != nullptr) {
@@ -1341,8 +1343,9 @@
              << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
   uint32_t before_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
-  ThreadList* tl = Runtime::Current()->GetThreadList();
-  Thread* self = Thread::Current();
+  Runtime* const runtime = Runtime::Current();
+  ThreadList* const tl = runtime->GetThreadList();
+  Thread* const self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
   const bool copying_transition =
@@ -1371,7 +1374,7 @@
     }
     usleep(1000);
   }
-  if (Runtime::Current()->IsShuttingDown(self)) {
+  if (runtime->IsShuttingDown(self)) {
     // Don't allow heap transitions to happen if the runtime is shutting down since these can
     // cause objects to get finalized.
     FinishGC(self, collector::kGcTypeNone);
@@ -1432,10 +1435,15 @@
 void Heap::ChangeCollector(CollectorType collector_type) {
   // TODO: Only do this with all mutators suspended to avoid races.
   if (collector_type != collector_type_) {
+    if (collector_type == kCollectorTypeMC) {
+      // Don't allow mark compact unless support is compiled in.
+      CHECK(kMarkCompactSupport);
+    }
     collector_type_ = collector_type;
     gc_plan_.clear();
     switch (collector_type_) {
       case kCollectorTypeCC:  // Fall-through.
+      case kCollectorTypeMC:  // Fall-through.
       case kCollectorTypeSS:  // Fall-through.
       case kCollectorTypeGSS: {
         gc_plan_.push_back(collector::kGcTypeFull);
@@ -1635,8 +1643,8 @@
     if (temp_space_ != nullptr) {
       CHECK(temp_space_->IsEmpty());
     }
-    total_objects_freed_ever_ += semi_space_collector_->GetFreedObjects();
-    total_bytes_freed_ever_ += semi_space_collector_->GetFreedBytes();
+    total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects();
+    total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes();
     // Update the end and write out image.
     non_moving_space_->SetEnd(target_space.End());
     non_moving_space_->SetLimit(target_space.Limit());
@@ -1722,13 +1730,17 @@
 void Heap::Compact(space::ContinuousMemMapAllocSpace* target_space,
                    space::ContinuousMemMapAllocSpace* source_space) {
   CHECK(kMovingCollector);
-  CHECK_NE(target_space, source_space) << "In-place compaction currently unsupported";
   if (target_space != source_space) {
     // Don't swap spaces since this isn't a typical semi space collection.
     semi_space_collector_->SetSwapSemiSpaces(false);
     semi_space_collector_->SetFromSpace(source_space);
     semi_space_collector_->SetToSpace(target_space);
     semi_space_collector_->Run(kGcCauseCollectorTransition, false);
+  } else {
+    CHECK(target_space->IsBumpPointerSpace())
+        << "In-place compaction is only supported for bump pointer spaces";
+    mark_compact_collector_->SetSpace(target_space->AsBumpPointerSpace());
+    mark_compact_collector_->Run(kGcCauseCollectorTransition, false);
   }
 }
 
@@ -1792,21 +1804,30 @@
   if (compacting_gc) {
     DCHECK(current_allocator_ == kAllocatorTypeBumpPointer ||
            current_allocator_ == kAllocatorTypeTLAB);
-    if (collector_type_ == kCollectorTypeSS || collector_type_ == kCollectorTypeGSS) {
-      gc_type = semi_space_collector_->GetGcType();
-      semi_space_collector_->SetFromSpace(bump_pointer_space_);
-      semi_space_collector_->SetToSpace(temp_space_);
-      collector = semi_space_collector_;
-      semi_space_collector_->SetSwapSemiSpaces(true);
-    } else if (collector_type_ == kCollectorTypeCC) {
-      gc_type = concurrent_copying_collector_->GetGcType();
-      collector = concurrent_copying_collector_;
-    } else {
-      LOG(FATAL) << "Unreachable - invalid collector type " << static_cast<size_t>(collector_type_);
+    switch (collector_type_) {
+      case kCollectorTypeSS:
+        // Fall-through.
+      case kCollectorTypeGSS:
+        semi_space_collector_->SetFromSpace(bump_pointer_space_);
+        semi_space_collector_->SetToSpace(temp_space_);
+        semi_space_collector_->SetSwapSemiSpaces(true);
+        collector = semi_space_collector_;
+        break;
+      case kCollectorTypeCC:
+        collector = concurrent_copying_collector_;
+        break;
+      case kCollectorTypeMC:
+        mark_compact_collector_->SetSpace(bump_pointer_space_);
+        collector = mark_compact_collector_;
+        break;
+      default:
+        LOG(FATAL) << "Invalid collector type " << static_cast<size_t>(collector_type_);
     }
-    temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-    CHECK(temp_space_->IsEmpty());
-    gc_type = collector::kGcTypeFull;
+    if (collector != mark_compact_collector_) {
+      temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+      CHECK(temp_space_->IsEmpty());
+    }
+    gc_type = collector::kGcTypeFull;  // TODO: Not hard code this in.
   } else if (current_allocator_ == kAllocatorTypeRosAlloc ||
       current_allocator_ == kAllocatorTypeDlMalloc) {
     collector = FindCollectorByGcType(gc_type);
@@ -1817,15 +1838,15 @@
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
   collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
-  total_objects_freed_ever_ += collector->GetFreedObjects();
-  total_bytes_freed_ever_ += collector->GetFreedBytes();
+  total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects();
+  total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes();
   RequestHeapTrim();
   // Enqueue cleared references.
   reference_processor_.EnqueueClearedReferences(self);
   // Grow the heap so that we know when to perform the next GC.
   GrowForUtilization(collector);
-  const size_t duration = collector->GetDurationNs();
-  const std::vector<uint64_t>& pause_times = collector->GetPauseTimes();
+  const size_t duration = GetCurrentGcIteration()->GetDurationNs();
+  const std::vector<uint64_t>& pause_times = GetCurrentGcIteration()->GetPauseTimes();
   // Print the GC if it is an explicit GC (e.g. Runtime.gc()) or a slow GC
   // (mutator time blocked >=  long_pause_log_threshold_).
   bool log_gc = gc_cause == kGcCauseExplicit;
@@ -1847,14 +1868,14 @@
                      << ((i != pause_times.size() - 1) ? "," : "");
     }
     LOG(INFO) << gc_cause << " " << collector->GetName()
-              << " GC freed "  << collector->GetFreedObjects() << "("
-              << PrettySize(collector->GetFreedBytes()) << ") AllocSpace objects, "
-              << collector->GetFreedLargeObjects() << "("
-              << PrettySize(collector->GetFreedLargeObjectBytes()) << ") LOS objects, "
+              << " GC freed "  << current_gc_iteration_.GetFreedObjects() << "("
+              << PrettySize(current_gc_iteration_.GetFreedBytes()) << ") AllocSpace objects, "
+              << current_gc_iteration_.GetFreedLargeObjects() << "("
+              << PrettySize(current_gc_iteration_.GetFreedLargeObjectBytes()) << ") LOS objects, "
               << percent_free << "% free, " << PrettySize(current_heap_size) << "/"
               << PrettySize(total_memory) << ", " << "paused " << pause_string.str()
               << " total " << PrettyDuration((duration / 1000) * 1000);
-    VLOG(heap) << ConstDumpable<TimingLogger>(collector->GetTimings());
+    VLOG(heap) << ConstDumpable<TimingLogger>(*current_gc_iteration_.GetTimings());
   }
   FinishGC(self, gc_type);
   // Inform DDMS that a GC completed.
@@ -2292,7 +2313,7 @@
   return it->second;
 }
 
-void Heap::ProcessCards(TimingLogger& timings, bool use_rem_sets) {
+void Heap::ProcessCards(TimingLogger* timings, bool use_rem_sets) {
   // Clear cards and keep track of cards cleared in the mod-union table.
   for (const auto& space : continuous_spaces_) {
     accounting::ModUnionTable* table = FindModUnionTableFromSpace(space);
@@ -2300,15 +2321,15 @@
     if (table != nullptr) {
       const char* name = space->IsZygoteSpace() ? "ZygoteModUnionClearCards" :
           "ImageModUnionClearCards";
-      TimingLogger::ScopedSplit split(name, &timings);
+      TimingLogger::ScopedSplit split(name, timings);
       table->ClearCards();
     } else if (use_rem_sets && rem_set != nullptr) {
       DCHECK(collector::SemiSpace::kUseRememberedSet && collector_type_ == kCollectorTypeGSS)
           << static_cast<int>(collector_type_);
-      TimingLogger::ScopedSplit split("AllocSpaceRemSetClearCards", &timings);
+      TimingLogger::ScopedSplit split("AllocSpaceRemSetClearCards", timings);
       rem_set->ClearCards();
     } else if (space->GetType() != space::kSpaceTypeBumpPointerSpace) {
-      TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
+      TimingLogger::ScopedSplit split("AllocSpaceClearCards", timings);
       // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards
       // were dirty before the GC started.
       // TODO: Need to use atomic for the case where aged(cleaning thread) -> dirty(other thread)
@@ -2316,7 +2337,8 @@
       // The races are we either end up with: Aged card, unaged card. Since we have the checkpoint
       // roots and then we scan / update mod union tables after. We will always scan either card.
       // If we end up with the non aged card, we scan it it in the pause.
-      card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), VoidFunctor());
+      card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(),
+                                     VoidFunctor());
     }
   }
 }
@@ -2326,7 +2348,7 @@
 
 void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) {
   Thread* const self = Thread::Current();
-  TimingLogger* const timings = &gc->GetTimings();
+  TimingLogger* const timings = current_gc_iteration_.GetTimings();
   if (verify_pre_gc_heap_) {
     TimingLogger::ScopedSplit split("PreGcVerifyHeapReferences", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -2368,13 +2390,13 @@
 void Heap::PrePauseRosAllocVerification(collector::GarbageCollector* gc) {
   // TODO: Add a new runtime option for this?
   if (verify_pre_gc_rosalloc_) {
-    RosAllocVerification(&gc->GetTimings(), "PreGcRosAllocVerification");
+    RosAllocVerification(current_gc_iteration_.GetTimings(), "PreGcRosAllocVerification");
   }
 }
 
 void Heap::PreSweepingGcVerification(collector::GarbageCollector* gc) {
   Thread* const self = Thread::Current();
-  TimingLogger* const timings = &gc->GetTimings();
+  TimingLogger* const timings = current_gc_iteration_.GetTimings();
   // Called before sweeping occurs since we want to make sure we are not going so reclaim any
   // reachable objects.
   if (verify_pre_sweeping_heap_) {
@@ -2400,7 +2422,7 @@
 void Heap::PostGcVerificationPaused(collector::GarbageCollector* gc) {
   // Only pause if we have to do some verification.
   Thread* const self = Thread::Current();
-  TimingLogger* const timings = &gc->GetTimings();
+  TimingLogger* const timings = GetCurrentGcIteration()->GetTimings();
   if (verify_system_weaks_) {
     ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_);
     collector::MarkSweep* mark_sweep = down_cast<collector::MarkSweep*>(gc);
@@ -2554,9 +2576,9 @@
     // We also check that the bytes allocated aren't over the footprint limit in order to prevent a
     // pathological case where dead objects which aren't reclaimed by sticky could get accumulated
     // if the sticky GC throughput always remained >= the full/partial throughput.
-    if (collector_ran->GetEstimatedLastIterationThroughput() * kStickyGcThroughputAdjustment >=
+    if (current_gc_iteration_.GetEstimatedThroughput() * kStickyGcThroughputAdjustment >=
         non_sticky_collector->GetEstimatedMeanThroughput() &&
-        non_sticky_collector->GetIterations() > 0 &&
+        non_sticky_collector->NumberOfIterations() > 0 &&
         bytes_allocated <= max_allowed_footprint_) {
       next_gc_type_ = collector::kGcTypeSticky;
     } else {
@@ -2574,7 +2596,7 @@
     if (IsGcConcurrent()) {
       // Calculate when to perform the next ConcurrentGC.
       // Calculate the estimated GC duration.
-      const double gc_duration_seconds = NsToMs(collector_ran->GetDurationNs()) / 1000.0;
+      const double gc_duration_seconds = NsToMs(current_gc_iteration_.GetDurationNs()) / 1000.0;
       // Estimate how many remaining bytes we will have when we need to start the next GC.
       size_t remaining_bytes = allocation_rate_ * gc_duration_seconds;
       remaining_bytes = std::min(remaining_bytes, kMaxConcurrentRemainingBytes);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 9b49373..a34cd38 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -27,6 +27,7 @@
 #include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table.h"
 #include "gc/gc_cause.h"
+#include "gc/collector/garbage_collector.h"
 #include "gc/collector/gc_type.h"
 #include "gc/collector_type.h"
 #include "globals.h"
@@ -66,6 +67,7 @@
 namespace collector {
   class ConcurrentCopying;
   class GarbageCollector;
+  class MarkCompact;
   class MarkSweep;
   class SemiSpace;
 }  // namespace collector
@@ -316,6 +318,13 @@
     return discontinuous_spaces_;
   }
 
+  const collector::Iteration* GetCurrentGcIteration() const {
+    return &current_gc_iteration_;
+  }
+  collector::Iteration* GetCurrentGcIteration() {
+    return &current_gc_iteration_;
+  }
+
   // Enable verification of object references when the runtime is sufficiently initialized.
   void EnableObjectValidation() {
     verify_object_mode_ = kVerifyObjectSupport;
@@ -573,7 +582,7 @@
   }
   static bool IsMovingGc(CollectorType collector_type) {
     return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS ||
-        collector_type == kCollectorTypeCC;
+        collector_type == kCollectorTypeCC || collector_type == kCollectorTypeMC;
   }
   bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -689,7 +698,7 @@
   void SwapStacks(Thread* self);
 
   // Clear cards and update the mod union table.
-  void ProcessCards(TimingLogger& timings, bool use_rem_sets);
+  void ProcessCards(TimingLogger* timings, bool use_rem_sets);
 
   // Signal the heap trim daemon that there is something to do, either a heap transition or heap
   // trim.
@@ -848,6 +857,9 @@
   // Data structure GC overhead.
   Atomic<size_t> gc_memory_overhead_;
 
+  // Info related to the current or previous GC iteration.
+  collector::Iteration current_gc_iteration_;
+
   // Heap verification flags.
   const bool verify_missing_card_marks_;
   const bool verify_system_weaks_;
@@ -952,12 +964,14 @@
 
   std::vector<collector::GarbageCollector*> garbage_collectors_;
   collector::SemiSpace* semi_space_collector_;
+  collector::MarkCompact* mark_compact_collector_;
   collector::ConcurrentCopying* concurrent_copying_collector_;
 
   const bool running_on_valgrind_;
   const bool use_tlab_;
 
   friend class collector::GarbageCollector;
+  friend class collector::MarkCompact;
   friend class collector::MarkSweep;
   friend class collector::SemiSpace;
   friend class ReferenceQueue;
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 3ff9889..292781e 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -205,6 +205,10 @@
   }
 }
 
+void ReferenceProcessor::UpdateRoots(IsMarkedCallback* callback, void* arg) {
+  cleared_references_.UpdateRoots(callback, arg);
+}
+
 void ReferenceProcessor::EnqueueClearedReferences(Thread* self) {
   Locks::mutator_lock_->AssertNotHeld(self);
   if (!cleared_references_.IsEmpty()) {
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index ff7da52..2771ea8 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -59,6 +59,8 @@
   void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
                               IsHeapReferenceMarkedCallback* is_marked_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void UpdateRoots(IsMarkedCallback* callback, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
  private:
   class ProcessReferencesArgs {
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 19476e6..c3931e8 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -163,5 +163,11 @@
   } while (LIKELY(ref != head));
 }
 
+void ReferenceQueue::UpdateRoots(IsMarkedCallback* callback, void* arg) {
+  if (list_ != nullptr) {
+    list_ = down_cast<mirror::Reference*>(callback(list_, arg));
+  }
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 8ef0d20..cd814bb 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -83,12 +83,16 @@
   mirror::Reference* GetList() {
     return list_;
   }
+  // Visits list_, currently only used for the mark compact GC.
+  void UpdateRoots(IsMarkedCallback* callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   // Lock, used for parallel GC reference enqueuing. It allows for multiple threads simultaneously
   // calling AtomicEnqueueIfNotEnqueued.
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  // The actual reference list. Not a root since it will be nullptr when the GC is not running.
+  // The actual reference list. Only a root for the mark compact GC since it will be null for other
+  // GC types.
   mirror::Reference* list_;
 };
 
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 9e61f30..feee34f 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -145,6 +145,12 @@
 
   accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() OVERRIDE;
 
+  // Record objects / bytes freed.
+  void RecordFree(int32_t objects, int32_t bytes) {
+    objects_allocated_.FetchAndSubSequentiallyConsistent(objects);
+    bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes);
+  }
+
   // Object alignment within the space.
   static constexpr size_t kAlignment = 8;
 
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 54a63f0..abae8ff 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -411,28 +411,24 @@
       bitmap->Clear(ptrs[i]);
     }
   }
-  context->freed_objects += num_ptrs;
-  context->freed_bytes += space->FreeList(self, num_ptrs, ptrs);
+  context->freed.objects += num_ptrs;
+  context->freed.bytes += space->FreeList(self, num_ptrs, ptrs);
 }
 
-void LargeObjectSpace::Sweep(bool swap_bitmaps, size_t* out_freed_objects,
-                             size_t* out_freed_bytes) {
+collector::ObjectBytePair LargeObjectSpace::Sweep(bool swap_bitmaps) {
   if (Begin() >= End()) {
-    return;
+    return collector::ObjectBytePair(0, 0);
   }
   accounting::LargeObjectBitmap* live_bitmap = GetLiveBitmap();
   accounting::LargeObjectBitmap* mark_bitmap = GetMarkBitmap();
   if (swap_bitmaps) {
     std::swap(live_bitmap, mark_bitmap);
   }
-  DCHECK(out_freed_objects != nullptr);
-  DCHECK(out_freed_bytes != nullptr);
-  SweepCallbackContext scc(swap_bitmaps, this);
+  AllocSpace::SweepCallbackContext scc(swap_bitmaps, this);
   accounting::LargeObjectBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
                                            reinterpret_cast<uintptr_t>(Begin()),
                                            reinterpret_cast<uintptr_t>(End()), SweepCallback, &scc);
-  *out_freed_objects += scc.freed_objects;
-  *out_freed_bytes += scc.freed_bytes;
+  return scc.freed;
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index a84b43a..01982d0 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -73,7 +73,7 @@
     return this;
   }
 
-  void Sweep(bool swap_bitmaps, size_t* out_freed_objects, size_t* out_freed_bytes);
+  collector::ObjectBytePair Sweep(bool swap_bitmaps);
 
   virtual bool CanMoveObjects() const OVERRIDE {
     return false;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 57ed0bd..4d74f3c 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -242,8 +242,8 @@
   // Use a bulk free, that merges consecutive objects before freeing or free per object?
   // Documentation suggests better free performance with merging, but this may be at the expensive
   // of allocation.
-  context->freed_objects += num_ptrs;
-  context->freed_bytes += space->FreeList(self, num_ptrs, ptrs);
+  context->freed.objects += num_ptrs;
+  context->freed.bytes += space->FreeList(self, num_ptrs, ptrs);
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index 4e28416..bff28f6 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -81,14 +81,12 @@
   CHECK(mark_bitmap_.get() != nullptr);
 }
 
-void ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
-  DCHECK(freed_objects != nullptr);
-  DCHECK(freed_bytes != nullptr);
+collector::ObjectBytePair ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps) {
   accounting::ContinuousSpaceBitmap* live_bitmap = GetLiveBitmap();
   accounting::ContinuousSpaceBitmap* mark_bitmap = GetMarkBitmap();
   // If the bitmaps are bound then sweeping this space clearly won't do anything.
   if (live_bitmap == mark_bitmap) {
-    return;
+    return collector::ObjectBytePair(0, 0);
   }
   SweepCallbackContext scc(swap_bitmaps, this);
   if (swap_bitmaps) {
@@ -98,8 +96,7 @@
   accounting::ContinuousSpaceBitmap::SweepWalk(
       *live_bitmap, *mark_bitmap, reinterpret_cast<uintptr_t>(Begin()),
       reinterpret_cast<uintptr_t>(End()), GetSweepCallback(), reinterpret_cast<void*>(&scc));
-  *freed_objects += scc.freed_objects;
-  *freed_bytes += scc.freed_bytes;
+  return scc.freed;
 }
 
 // Returns the old mark bitmap.
@@ -136,9 +133,8 @@
   mark_bitmap_->SetName(temp_name);
 }
 
-Space::SweepCallbackContext::SweepCallbackContext(bool swap_bitmaps, space::Space* space)
-    : swap_bitmaps(swap_bitmaps), space(space), self(Thread::Current()), freed_objects(0),
-      freed_bytes(0) {
+AllocSpace::SweepCallbackContext::SweepCallbackContext(bool swap_bitmaps, space::Space* space)
+    : swap_bitmaps(swap_bitmaps), space(space), self(Thread::Current()) {
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 8415fa1..8444a70 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -23,6 +23,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/space_bitmap.h"
+#include "gc/collector/garbage_collector.h"
 #include "globals.h"
 #include "image.h"
 #include "mem_map.h"
@@ -172,16 +173,6 @@
   std::string name_;
 
  protected:
-  struct SweepCallbackContext {
-   public:
-    SweepCallbackContext(bool swap_bitmaps, space::Space* space);
-    const bool swap_bitmaps;
-    space::Space* const space;
-    Thread* const self;
-    size_t freed_objects;
-    size_t freed_bytes;
-  };
-
   // When should objects within this space be reclaimed? Not constant as we vary it in the case
   // of Zygote forking.
   GcRetentionPolicy gc_retention_policy_;
@@ -232,6 +223,14 @@
   virtual void RevokeAllThreadLocalBuffers() = 0;
 
  protected:
+  struct SweepCallbackContext {
+    SweepCallbackContext(bool swap_bitmaps, space::Space* space);
+    const bool swap_bitmaps;
+    space::Space* const space;
+    Thread* const self;
+    collector::ObjectBytePair freed;
+  };
+
   AllocSpace() {}
   virtual ~AllocSpace() {}
 
@@ -415,7 +414,7 @@
     return mark_bitmap_.get();
   }
 
-  void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
+  collector::ObjectBytePair Sweep(bool swap_bitmaps);
   virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() = 0;
 
  protected:
diff --git a/runtime/globals.h b/runtime/globals.h
index 58c2118..3a906f1 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -74,8 +74,11 @@
 
 // Garbage collector constants.
 static constexpr bool kMovingCollector = true && !kUsePortableCompiler;
+static constexpr bool kMarkCompactSupport = false && kMovingCollector;
+// True if we allow moving field arrays, this can cause complication with mark compact.
+static constexpr bool kMoveFieldArrays = !kMarkCompactSupport;
 // True if we allow moving classes.
-static constexpr bool kMovingClasses = true;
+static constexpr bool kMovingClasses = !kMarkCompactSupport;
 // True if we allow moving fields.
 static constexpr bool kMovingFields = false;
 // True if we allow moving methods.
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index b787233..f561643 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -59,8 +59,7 @@
 
 // Make sure that the entry at "idx" is correctly paired with "iref".
 inline bool IndirectReferenceTable::CheckEntry(const char* what, IndirectRef iref, int idx) const {
-  const mirror::Object* obj = table_[idx];
-  IndirectRef checkRef = ToIndirectRef(obj, idx);
+  IndirectRef checkRef = ToIndirectRef(idx);
   if (UNLIKELY(checkRef != iref)) {
     LOG(ERROR) << "JNI ERROR (app bug): attempt to " << what
                << " stale " << kind_ << " " << iref
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 98e1d21..ad798ed 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -137,13 +137,13 @@
       DCHECK_GE(pScan, table_ + prevState.parts.topIndex);
     }
     UpdateSlotAdd(obj, pScan - table_);
-    result = ToIndirectRef(obj, pScan - table_);
+    result = ToIndirectRef(pScan - table_);
     *pScan = obj;
     segment_state_.parts.numHoles--;
   } else {
     // Add to the end.
     UpdateSlotAdd(obj, topIndex);
-    result = ToIndirectRef(obj, topIndex);
+    result = ToIndirectRef(topIndex);
     table_[topIndex++] = obj;
     segment_state_.parts.topIndex = topIndex;
   }
@@ -277,9 +277,6 @@
       // while the read barrier won't.
       entries.push_back(obj);
     } else {
-      // We need a read barrier if weak globals. Since this is for
-      // debugging where performance isn't top priority, we
-      // unconditionally enable the read barrier, which is conservative.
       obj = ReadBarrier::BarrierForRoot<mirror::Object, kWithReadBarrier>(root);
       entries.push_back(obj);
     }
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 5b3ed68..b3a855d 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -27,6 +27,7 @@
 #include "mem_map.h"
 #include "object_callbacks.h"
 #include "offsets.h"
+#include "read_barrier.h"
 
 namespace art {
 namespace mirror {
@@ -215,6 +216,7 @@
   }
 
   mirror::Object** operator*() {
+    // This does not have a read barrier as this is used to visit roots.
     return &table_[i_];
   }
 
@@ -298,6 +300,7 @@
     return segment_state_.parts.topIndex;
   }
 
+  // Note IrtIterator does not have a read barrier as it's used to visit roots.
   IrtIterator begin() {
     return IrtIterator(table_, 0, Capacity());
   }
@@ -333,7 +336,7 @@
    * The object pointer itself is subject to relocation in some GC
    * implementations, so we shouldn't really be using it here.
    */
-  IndirectRef ToIndirectRef(const mirror::Object* /*o*/, uint32_t tableIndex) const {
+  IndirectRef ToIndirectRef(uint32_t tableIndex) const {
     DCHECK_LT(tableIndex, 65536U);
     uint32_t serialChunk = slot_data_[tableIndex].serial;
     uintptr_t uref = serialChunk << 20 | (tableIndex << 2) | kind_;
@@ -368,9 +371,8 @@
   std::unique_ptr<MemMap> table_mem_map_;
   // Mem map where we store the extended debugging info.
   std::unique_ptr<MemMap> slot_mem_map_;
-  // bottom of the stack. If a JNI weak global table, do not directly
-  // access the object references in this as they are weak roots. Use
-  // Get() that has a read barrier.
+  // bottom of the stack. Do not directly access the object references
+  // in this as they are roots. Use Get() that has a read barrier.
   mirror::Object** table_;
   /* bit mask, ORed into all irefs */
   IndirectRefKind kind_;
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 67e7100..96eeb8d 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -22,6 +22,7 @@
 
 #include "base/logging.h"  // Logging is required for FATAL in the helper functions.
 #include "base/macros.h"
+#include "globals.h"       // For KB.
 
 namespace art {
 
@@ -36,6 +37,20 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+#if defined(__arm__)
+static constexpr InstructionSet kRuntimeISA = kArm;
+#elif defined(__aarch64__)
+static constexpr InstructionSet kRuntimeISA = kArm64;
+#elif defined(__mips__)
+static constexpr InstructionSet kRuntimeISA = kMips;
+#elif defined(__i386__)
+static constexpr InstructionSet kRuntimeISA = kX86;
+#elif defined(__x86_64__)
+static constexpr InstructionSet kRuntimeISA = kX86_64;
+#else
+static constexpr InstructionSet kRuntimeISA = kNone;
+#endif
+
 // Architecture-specific pointer sizes
 static constexpr size_t kArmPointerSize = 4;
 static constexpr size_t kArm64PointerSize = 8;
@@ -153,19 +168,33 @@
   }
 }
 
-#if defined(__arm__)
-static constexpr InstructionSet kRuntimeISA = kArm;
-#elif defined(__aarch64__)
-static constexpr InstructionSet kRuntimeISA = kArm64;
-#elif defined(__mips__)
-static constexpr InstructionSet kRuntimeISA = kMips;
-#elif defined(__i386__)
-static constexpr InstructionSet kRuntimeISA = kX86;
-#elif defined(__x86_64__)
-static constexpr InstructionSet kRuntimeISA = kX86_64;
-#else
-static constexpr InstructionSet kRuntimeISA = kNone;
-#endif
+static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
+static constexpr size_t kArmStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+
+// TODO: shrink reserved space, in particular for 64bit.
+
+// Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
+// But this one works rather well.
+static constexpr size_t kArm64StackOverflowReservedBytes = 32 * KB;
+// TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
+// test-art-host-run-test-interpreter-018-stack-overflow
+// test-art-host-run-test-interpreter-107-int-math2
+static constexpr size_t kX86StackOverflowReservedBytes = 24 * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB;
+
+static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) {
+  return (isa == kArm || isa == kThumb2) ? kArmStackOverflowReservedBytes :
+           isa == kArm64 ? kArm64StackOverflowReservedBytes :
+           isa == kMips ? kMipsStackOverflowReservedBytes :
+           isa == kX86 ? kX86StackOverflowReservedBytes :
+           isa == kX86_64 ? kX86_64StackOverflowReservedBytes :
+           isa == kNone ? (LOG(FATAL) << "kNone has no stack overflow size", 0) :
+           (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+static constexpr size_t kRuntimeStackOverflowReservedBytes =
+    GetStackOverflowReservedBytes(kRuntimeISA);
 
 enum InstructionFeatures {
   kHwDiv  = 0x1,              // Supports hardware divide.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 6dbc6a0..cb4d444 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -454,7 +454,7 @@
   }
   self->EndAssertNoThreadSuspension(old_cause);
   // Do this after populating the shadow frame in case EnsureInitialized causes a GC.
-  if (method->IsStatic() && UNLIKELY(!method->GetDeclaringClass()->IsInitializing())) {
+  if (method->IsStatic() && UNLIKELY(!method->GetDeclaringClass()->IsInitialized())) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
@@ -527,7 +527,7 @@
   // Ensure static methods are initialized.
   if (method->IsStatic()) {
     mirror::Class* declaring_class = method->GetDeclaringClass();
-    if (UNLIKELY(!declaring_class->IsInitializing())) {
+    if (UNLIKELY(!declaring_class->IsInitialized())) {
       StackHandleScope<1> hs(self);
       HandleWrapper<Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
       if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index 1477324..325b089 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -294,14 +294,14 @@
                                      ObjectId threadId)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void CleanupMatchList(JdwpEvent** match_list,
-                        int match_count)
+                        size_t match_count)
       EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void EventFinish(ExpandBuf* pReq);
   void FindMatchingEvents(JdwpEventKind eventKind,
-                          ModBasket* basket,
+                          const ModBasket& basket,
                           JdwpEvent** match_list,
-                          int* pMatchCount)
+                          size_t* pMatchCount)
       EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void UnregisterEvent(JdwpEvent* pEvent)
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index cb2c420..86c84e8 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -397,7 +397,7 @@
  * Run through the list and remove any entries with an expired "count" mod
  * from the event list, then free the match list.
  */
-void JdwpState::CleanupMatchList(JdwpEvent** match_list, int match_count) {
+void JdwpState::CleanupMatchList(JdwpEvent** match_list, size_t match_count) {
   JdwpEvent** ppEvent = match_list;
 
   while (match_count--) {
@@ -405,7 +405,8 @@
 
     for (int i = 0; i < pEvent->modCount; i++) {
       if (pEvent->mods[i].modKind == MK_COUNT && pEvent->mods[i].count.count == 0) {
-        VLOG(jdwp) << "##### Removing expired event";
+        VLOG(jdwp) << StringPrintf("##### Removing expired event (requestId=%#" PRIx32 ")",
+                                   pEvent->requestId);
         UnregisterEvent(pEvent);
         EventFree(pEvent);
         break;
@@ -445,7 +446,7 @@
  * If we find a Count mod before rejecting an event, we decrement it.  We
  * need to do this even if later mods cause us to ignore the event.
  */
-static bool ModsMatch(JdwpEvent* pEvent, ModBasket* basket)
+static bool ModsMatch(JdwpEvent* pEvent, const ModBasket& basket)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JdwpEventMod* pMod = pEvent->mods;
 
@@ -462,53 +463,53 @@
       CHECK(false);  // should not be getting these
       break;
     case MK_THREAD_ONLY:
-      if (pMod->threadOnly.threadId != basket->threadId) {
+      if (pMod->threadOnly.threadId != basket.threadId) {
         return false;
       }
       break;
     case MK_CLASS_ONLY:
-      if (!Dbg::MatchType(basket->classId, pMod->classOnly.refTypeId)) {
+      if (!Dbg::MatchType(basket.classId, pMod->classOnly.refTypeId)) {
         return false;
       }
       break;
     case MK_CLASS_MATCH:
-      if (!PatternMatch(pMod->classMatch.classPattern, basket->className)) {
+      if (!PatternMatch(pMod->classMatch.classPattern, basket.className)) {
         return false;
       }
       break;
     case MK_CLASS_EXCLUDE:
-      if (PatternMatch(pMod->classMatch.classPattern, basket->className)) {
+      if (PatternMatch(pMod->classMatch.classPattern, basket.className)) {
         return false;
       }
       break;
     case MK_LOCATION_ONLY:
-      if (pMod->locationOnly.loc != *basket->pLoc) {
+      if (pMod->locationOnly.loc != *basket.pLoc) {
         return false;
       }
       break;
     case MK_EXCEPTION_ONLY:
-      if (pMod->exceptionOnly.refTypeId != 0 && !Dbg::MatchType(basket->excepClassId, pMod->exceptionOnly.refTypeId)) {
+      if (pMod->exceptionOnly.refTypeId != 0 && !Dbg::MatchType(basket.excepClassId, pMod->exceptionOnly.refTypeId)) {
         return false;
       }
-      if ((basket->caught && !pMod->exceptionOnly.caught) || (!basket->caught && !pMod->exceptionOnly.uncaught)) {
+      if ((basket.caught && !pMod->exceptionOnly.caught) || (!basket.caught && !pMod->exceptionOnly.uncaught)) {
         return false;
       }
       break;
     case MK_FIELD_ONLY:
-      if (pMod->fieldOnly.fieldId != basket->fieldId) {
+      if (pMod->fieldOnly.fieldId != basket.fieldId) {
         return false;
       }
-      if (!Dbg::MatchType(basket->fieldTypeID, pMod->fieldOnly.refTypeId)) {
+      if (!Dbg::MatchType(basket.fieldTypeID, pMod->fieldOnly.refTypeId)) {
         return false;
       }
       break;
     case MK_STEP:
-      if (pMod->step.threadId != basket->threadId) {
+      if (pMod->step.threadId != basket.threadId) {
         return false;
       }
       break;
     case MK_INSTANCE_ONLY:
-      if (pMod->instanceOnly.objectId != basket->thisPtr) {
+      if (pMod->instanceOnly.objectId != basket.thisPtr) {
         return false;
       }
       break;
@@ -530,19 +531,16 @@
  * DO NOT call this multiple times for the same eventKind, as Count mods are
  * decremented during the scan.
  */
-void JdwpState::FindMatchingEvents(JdwpEventKind eventKind, ModBasket* basket,
-                                   JdwpEvent** match_list, int* pMatchCount) {
+void JdwpState::FindMatchingEvents(JdwpEventKind eventKind, const ModBasket& basket,
+                                   JdwpEvent** match_list, size_t* pMatchCount) {
   /* start after the existing entries */
   match_list += *pMatchCount;
 
-  JdwpEvent* pEvent = event_list_;
-  while (pEvent != NULL) {
+  for (JdwpEvent* pEvent = event_list_; pEvent != nullptr; pEvent = pEvent->next) {
     if (pEvent->eventKind == eventKind && ModsMatch(pEvent, basket)) {
       *match_list++ = pEvent;
       (*pMatchCount)++;
     }
-
-    pEvent = pEvent->next;
   }
 }
 
@@ -774,6 +772,22 @@
   return true;
 }
 
+static void LogMatchingEventsAndThread(JdwpEvent** match_list, size_t match_count,
+                                       const ModBasket& basket)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  for (size_t i = 0; i < match_count; ++i) {
+    JdwpEvent* pEvent = match_list[i];
+    VLOG(jdwp) << "EVENT #" << i << ": " << pEvent->eventKind
+               << StringPrintf(" (requestId=%#" PRIx32 ")", pEvent->requestId);
+  }
+  std::string thread_name;
+  JdwpError error = Dbg::GetThreadName(basket.threadId, thread_name);
+  if (error != JDWP::ERR_NONE) {
+    thread_name = "<unknown>";
+  }
+  VLOG(jdwp) << StringPrintf("  thread=%#" PRIx64, basket.threadId) << " " << thread_name;
+}
+
 /*
  * A location of interest has been reached.  This handles:
  *   Breakpoint
@@ -829,39 +843,40 @@
     return false;
   }
 
-  int match_count = 0;
+  size_t match_count = 0;
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
   {
     MutexLock mu(Thread::Current(), event_list_lock_);
     JdwpEvent** match_list = AllocMatchList(event_list_size_);
     if ((eventFlags & Dbg::kBreakpoint) != 0) {
-      FindMatchingEvents(EK_BREAKPOINT, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_BREAKPOINT, basket, match_list, &match_count);
     }
     if ((eventFlags & Dbg::kSingleStep) != 0) {
-      FindMatchingEvents(EK_SINGLE_STEP, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_SINGLE_STEP, basket, match_list, &match_count);
     }
     if ((eventFlags & Dbg::kMethodEntry) != 0) {
-      FindMatchingEvents(EK_METHOD_ENTRY, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_METHOD_ENTRY, basket, match_list, &match_count);
     }
     if ((eventFlags & Dbg::kMethodExit) != 0) {
-      FindMatchingEvents(EK_METHOD_EXIT, &basket, match_list, &match_count);
-      FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_METHOD_EXIT, basket, match_list, &match_count);
+      FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, basket, match_list, &match_count);
     }
     if (match_count != 0) {
-      VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << basket.className << "." << Dbg::GetMethodName(pLoc->method_id)
-                 << StringPrintf(" thread=%#" PRIx64 "  dex_pc=%#" PRIx64 ")",
-                                 basket.threadId, pLoc->dex_pc);
-
       suspend_policy = scanSuspendPolicy(match_list, match_count);
-      VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+
+      if (VLOG_IS_ON(jdwp)) {
+        LogMatchingEventsAndThread(match_list, match_count, basket);
+        VLOG(jdwp) << "  location=" << *pLoc;
+        VLOG(jdwp) << StringPrintf("  this=%#" PRIx64, basket.thisPtr);
+        VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+      }
 
       pReq = eventPrep();
       expandBufAdd1(pReq, suspend_policy);
       expandBufAdd4BE(pReq, match_count);
 
-      for (int i = 0; i < match_count; i++) {
+      for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
         expandBufAdd8BE(pReq, basket.threadId);
@@ -892,6 +907,8 @@
   basket.fieldTypeID = typeId;
   basket.fieldId = fieldId;
 
+  DCHECK_EQ(fieldValue != nullptr, is_modification);
+
   if (InvokeInProgress()) {
     VLOG(jdwp) << "Not posting field event during invoke";
     return false;
@@ -912,7 +929,7 @@
     return false;
   }
 
-  int match_count = 0;
+  size_t match_count = 0;
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
   {
@@ -920,24 +937,29 @@
     JdwpEvent** match_list = AllocMatchList(event_list_size_);
 
     if (is_modification) {
-      FindMatchingEvents(EK_FIELD_MODIFICATION, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_FIELD_MODIFICATION, basket, match_list, &match_count);
     } else {
-      FindMatchingEvents(EK_FIELD_ACCESS, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_FIELD_ACCESS, basket, match_list, &match_count);
     }
     if (match_count != 0) {
-      VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << basket.className << "." << Dbg::GetMethodName(pLoc->method_id)
-                 << StringPrintf(" thread=%#" PRIx64 "  dex_pc=%#" PRIx64 ")",
-                                 basket.threadId, pLoc->dex_pc);
-
       suspend_policy = scanSuspendPolicy(match_list, match_count);
-      VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+
+      if (VLOG_IS_ON(jdwp)) {
+        LogMatchingEventsAndThread(match_list, match_count, basket);
+        VLOG(jdwp) << "  location=" << *pLoc;
+        VLOG(jdwp) << StringPrintf("  this=%#" PRIx64, basket.thisPtr);
+        VLOG(jdwp) << StringPrintf("  type=%#" PRIx64, basket.fieldTypeID) << " "
+                   << Dbg::GetClassName(basket.fieldTypeID);
+        VLOG(jdwp) << StringPrintf("  field=%#" PRIx32, basket.fieldId) << " "
+                   << Dbg::GetFieldName(basket.fieldId);
+        VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+      }
 
       pReq = eventPrep();
       expandBufAdd1(pReq, suspend_policy);
       expandBufAdd4BE(pReq, match_count);
 
-      for (int i = 0; i < match_count; i++) {
+      for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
         expandBufAdd8BE(pReq, basket.threadId);
@@ -984,30 +1006,31 @@
 
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
-  int match_count = 0;
+  size_t match_count = 0;
   {
     // Don't allow the list to be updated while we scan it.
     MutexLock mu(Thread::Current(), event_list_lock_);
     JdwpEvent** match_list = AllocMatchList(event_list_size_);
 
     if (start) {
-      FindMatchingEvents(EK_THREAD_START, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_THREAD_START, basket, match_list, &match_count);
     } else {
-      FindMatchingEvents(EK_THREAD_DEATH, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_THREAD_DEATH, basket, match_list, &match_count);
     }
 
     if (match_count != 0) {
-      VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << StringPrintf("thread=%#" PRIx64, basket.threadId) << ")";
-
       suspend_policy = scanSuspendPolicy(match_list, match_count);
-      VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+
+      if (VLOG_IS_ON(jdwp)) {
+        LogMatchingEventsAndThread(match_list, match_count, basket);
+        VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+      }
 
       pReq = eventPrep();
       expandBufAdd1(pReq, suspend_policy);
       expandBufAdd4BE(pReq, match_count);
 
-      for (int i = 0; i < match_count; i++) {
+      for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
         expandBufAdd8BE(pReq, basket.threadId);
@@ -1072,33 +1095,35 @@
     return false;
   }
 
-  int match_count = 0;
+  size_t match_count = 0;
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
   {
     MutexLock mu(Thread::Current(), event_list_lock_);
     JdwpEvent** match_list = AllocMatchList(event_list_size_);
-    FindMatchingEvents(EK_EXCEPTION, &basket, match_list, &match_count);
+    FindMatchingEvents(EK_EXCEPTION, basket, match_list, &match_count);
     if (match_count != 0) {
-      VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total)"
-                 << StringPrintf(" thread=%#" PRIx64, basket.threadId)
-                 << StringPrintf(" exceptId=%#" PRIx64, exceptionId)
-                 << " caught=" << basket.caught << ")"
-                 << "  throw: " << *pThrowLoc;
-      if (pCatchLoc->class_id == 0) {
-        VLOG(jdwp) << "  catch: (not caught)";
-      } else {
-        VLOG(jdwp) << "  catch: " << *pCatchLoc;
-      }
-
       suspend_policy = scanSuspendPolicy(match_list, match_count);
-      VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+
+      if (VLOG_IS_ON(jdwp)) {
+        LogMatchingEventsAndThread(match_list, match_count, basket);
+        VLOG(jdwp) << "  throwLocation=" << *pThrowLoc;
+        if (pCatchLoc->class_id == 0) {
+          VLOG(jdwp) << "  catchLocation=uncaught";
+        } else {
+          VLOG(jdwp) << "  catchLocation=" << *pCatchLoc;
+        }
+        VLOG(jdwp) << StringPrintf("  this=%#" PRIx64, basket.thisPtr);
+        VLOG(jdwp) << StringPrintf("  exceptionClass=%#" PRIx64, basket.excepClassId) << " "
+                   << Dbg::GetClassName(basket.excepClassId);
+        VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+      }
 
       pReq = eventPrep();
       expandBufAdd1(pReq, suspend_policy);
       expandBufAdd4BE(pReq, match_count);
 
-      for (int i = 0; i < match_count; i++) {
+      for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
         expandBufAdd8BE(pReq, basket.threadId);
@@ -1142,17 +1167,19 @@
 
   ExpandBuf* pReq = NULL;
   JdwpSuspendPolicy suspend_policy = SP_NONE;
-  int match_count = 0;
+  size_t match_count = 0;
   {
     MutexLock mu(Thread::Current(), event_list_lock_);
     JdwpEvent** match_list = AllocMatchList(event_list_size_);
-    FindMatchingEvents(EK_CLASS_PREPARE, &basket, match_list, &match_count);
+    FindMatchingEvents(EK_CLASS_PREPARE, basket, match_list, &match_count);
     if (match_count != 0) {
-      VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << StringPrintf("thread=%#" PRIx64, basket.threadId) << ") " << signature;
-
       suspend_policy = scanSuspendPolicy(match_list, match_count);
-      VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+
+      if (VLOG_IS_ON(jdwp)) {
+        LogMatchingEventsAndThread(match_list, match_count, basket);
+        VLOG(jdwp) << StringPrintf("  type=%#" PRIx64, basket.classId)<< " " << signature;
+        VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+      }
 
       if (basket.threadId == debug_thread_id_) {
         /*
@@ -1171,7 +1198,7 @@
       expandBufAdd1(pReq, suspend_policy);
       expandBufAdd4BE(pReq, match_count);
 
-      for (int i = 0; i < match_count; i++) {
+      for (size_t i = 0; i < match_count; i++) {
         expandBufAdd1(pReq, match_list[i]->eventKind);
         expandBufAdd4BE(pReq, match_list[i]->requestId);
         expandBufAdd8BE(pReq, basket.threadId);
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 66406bf..513b409 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -135,6 +135,8 @@
   mirror::ArtMethod* method = nullptr;
   if (is_static) {
     method = c->FindDirectMethod(name, sig);
+  } else if (c->IsInterface()) {
+    method = c->FindInterfaceMethod(name, sig);
   } else {
     method = c->FindVirtualMethod(name, sig);
     if (method == nullptr) {
@@ -357,8 +359,9 @@
         jni_on_load_result_(kPending) {
   }
 
-  mirror::Object* GetClassLoader() {
-    return class_loader_;
+  mirror::Object* GetClassLoader() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Object** root = &class_loader_;
+    return ReadBarrier::BarrierForRoot<mirror::Object, kWithReadBarrier>(root);
   }
 
   std::string GetPath() {
@@ -3158,9 +3161,7 @@
   while (UNLIKELY(!allow_new_weak_globals_)) {
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
-  // The weak globals do need a read barrier as they are weak roots.
-  mirror::Object* obj = weak_globals_.Get<kWithReadBarrier>(ref);
-  return obj;
+  return weak_globals_.Get(ref);
 }
 
 void JavaVMExt::DumpReferenceTables(std::ostream& os) {
@@ -3312,8 +3313,7 @@
 void* JavaVMExt::FindCodeForNativeMethod(mirror::ArtMethod* m) {
   CHECK(m->IsNative());
   mirror::Class* c = m->GetDeclaringClass();
-  // If this is a static method, it could be called before the class
-  // has been initialized.
+  // If this is a static method, it could be called before the class has been initialized.
   if (m->IsStatic()) {
     c = EnsureInitialized(Thread::Current(), c);
     if (c == nullptr) {
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index f182e95..5e46c57 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -268,32 +268,38 @@
   jclass jlobject = env_->FindClass("java/lang/Object");
   jclass jlstring = env_->FindClass("java/lang/String");
   jclass jlnsme = env_->FindClass("java/lang/NoSuchMethodError");
+  jclass jncrbc = env_->FindClass("java/nio/channels/ReadableByteChannel");
 
-  // Sanity check that no exceptions are pending
+  // Sanity check that no exceptions are pending.
   ASSERT_FALSE(env_->ExceptionCheck());
 
   // Check that java.lang.Object.foo() doesn't exist and NoSuchMethodError is
-  // a pending exception
+  // a pending exception.
   jmethodID method = env_->GetMethodID(jlobject, "foo", "()V");
   EXPECT_EQ(nullptr, method);
   ExpectException(jlnsme);
 
-  // Check that java.lang.Object.equals() does exist
+  // Check that java.lang.Object.equals() does exist.
   method = env_->GetMethodID(jlobject, "equals", "(Ljava/lang/Object;)Z");
   EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
 
   // Check that GetMethodID for java.lang.String.valueOf(int) fails as the
-  // method is static
+  // method is static.
   method = env_->GetMethodID(jlstring, "valueOf", "(I)Ljava/lang/String;");
   EXPECT_EQ(nullptr, method);
   ExpectException(jlnsme);
 
-  // Check that GetMethodID for java.lang.NoSuchMethodError.<init>(String) finds the constructor
+  // Check that GetMethodID for java.lang.NoSuchMethodError.<init>(String) finds the constructor.
   method = env_->GetMethodID(jlnsme, "<init>", "(Ljava/lang/String;)V");
   EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
 
+  // Check that GetMethodID can find a interface method inherited from another interface.
+  method = env_->GetMethodID(jncrbc, "close", "()V");
+  EXPECT_NE(nullptr, method);
+  EXPECT_FALSE(env_->ExceptionCheck());
+
   // Bad arguments.
   CheckJniAbortCatcher check_jni_abort_catcher;
   method = env_->GetMethodID(nullptr, "<init>", "(Ljava/lang/String;)V");
@@ -630,11 +636,13 @@
   jni_abort_catcher.Check(
       "attempt to get double primitive array elements with an object of type boolean[]");
   jbyteArray array2 = env_->NewByteArray(10);
-  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), &is_copy), nullptr);
+  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), &is_copy),
+            nullptr);
   jni_abort_catcher.Check(
       "attempt to get boolean primitive array elements with an object of type byte[]");
   jobject object = env_->NewStringUTF("Test String");
-  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), &is_copy), nullptr);
+  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), &is_copy),
+            nullptr);
   jni_abort_catcher.Check(
       "attempt to get boolean primitive array elements with an object of type java.lang.String");
 }
@@ -681,7 +689,8 @@
   jobject object = env_->NewStringUTF("Test String");
   env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), elements, 0);
   jni_abort_catcher.Check(
-      "attempt to release boolean primitive array elements with an object of type java.lang.String");
+      "attempt to release boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 TEST_F(JniInternalTest, GetReleasePrimitiveArrayCriticalOfWrongType) {
   CheckJniAbortCatcher jni_abort_catcher;
@@ -736,7 +745,8 @@
   env_->GetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(object), 0, kLength,
                               reinterpret_cast<jboolean*>(elements));
   jni_abort_catcher.Check(
-      "attempt to get region of boolean primitive array elements with an object of type java.lang.String");
+      "attempt to get region of boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 
 TEST_F(JniInternalTest, SetPrimitiveArrayRegionElementsOfWrongType) {
@@ -782,7 +792,8 @@
   env_->SetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(object), 0, kLength,
                               reinterpret_cast<jboolean*>(elements));
   jni_abort_catcher.Check(
-      "attempt to set region of boolean primitive array elements with an object of type java.lang.String");
+      "attempt to set region of boolean primitive array elements with an object of type "
+      "java.lang.String");
 }
 
 TEST_F(JniInternalTest, NewObjectArray) {
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 81a8623..8d987df 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -34,6 +34,13 @@
 
 #ifdef USE_ASHMEM
 #include <cutils/ashmem.h>
+#ifndef ANDROID_OS
+#include <sys/resource.h>
+#endif
+#endif
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
 #endif
 
 namespace art {
@@ -179,20 +186,32 @@
   }
   size_t page_aligned_byte_count = RoundUp(byte_count, kPageSize);
 
-#ifdef USE_ASHMEM
-  // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
-  // prefixed "dalvik-".
-  std::string debug_friendly_name("dalvik-");
-  debug_friendly_name += name;
-  ScopedFd fd(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count));
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", name, strerror(errno));
-    return nullptr;
-  }
-  int flags = MAP_PRIVATE;
-#else
-  ScopedFd fd(-1);
   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+  ScopedFd fd(-1);
+
+#ifdef USE_ASHMEM
+#ifdef HAVE_ANDROID_OS
+  const bool use_ashmem = true;
+#else
+  // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
+  // fail due to ulimit restrictions. If they will then use a regular mmap.
+  struct rlimit rlimit_fsize;
+  CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
+  const bool use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
+      (page_aligned_byte_count < rlimit_fsize.rlim_cur);
+#endif
+  if (use_ashmem) {
+    // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
+    // prefixed "dalvik-".
+    std::string debug_friendly_name("dalvik-");
+    debug_friendly_name += name;
+    fd.reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count));
+    if (fd.get() == -1) {
+      *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", name, strerror(errno));
+      return nullptr;
+    }
+    flags = MAP_PRIVATE;
+  }
 #endif
 
   // We need to store and potentially set an error number for pretty printing of errors
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 512a66f..6205f70 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -505,8 +505,10 @@
 
 template <bool kVisitClass, typename Visitor>
 inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
+  // Visit the static fields first so that we don't overwrite the SFields / IFields instance
+  // fields.
   VisitStaticFieldsReferences<kVisitClass>(this, visitor);
+  VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
 }
 
 inline bool Class::IsArtFieldClass() const {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index a20f7b9..c798180 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -37,17 +37,17 @@
 namespace art {
 namespace mirror {
 
-Class* Class::java_lang_Class_ = NULL;
+Class* Class::java_lang_Class_ = nullptr;
 
 void Class::SetClassClass(Class* java_lang_Class) {
-  CHECK(java_lang_Class_ == NULL) << java_lang_Class_ << " " << java_lang_Class;
-  CHECK(java_lang_Class != NULL);
+  CHECK(java_lang_Class_ == nullptr) << java_lang_Class_ << " " << java_lang_Class;
+  CHECK(java_lang_Class != nullptr);
   java_lang_Class_ = java_lang_Class;
 }
 
 void Class::ResetClass() {
-  CHECK(java_lang_Class_ != NULL);
-  java_lang_Class_ = NULL;
+  CHECK(java_lang_Class_ != nullptr);
+  java_lang_Class_ = nullptr;
 }
 
 void Class::VisitRoots(RootCallback* callback, void* arg) {
@@ -146,7 +146,7 @@
   if ((descriptor[0] != 'L') && (descriptor[0] != '[')) {
     // The descriptor indicates that this is the class for
     // a primitive type; special-case the return value.
-    const char* c_name = NULL;
+    const char* c_name = nullptr;
     switch (descriptor[0]) {
     case 'Z': c_name = "boolean"; break;
     case 'B': c_name = "byte";    break;
@@ -196,10 +196,10 @@
   os << "----- " << (IsInterface() ? "interface" : "class") << " "
      << "'" << GetDescriptor() << "' cl=" << GetClassLoader() << " -----\n",
   os << "  objectSize=" << SizeOf() << " "
-     << "(" << (h_super.Get() != NULL ? h_super->SizeOf() : -1) << " from super)\n",
+     << "(" << (h_super.Get() != nullptr ? h_super->SizeOf() : -1) << " from super)\n",
   os << StringPrintf("  access=0x%04x.%04x\n",
       GetAccessFlags() >> 16, GetAccessFlags() & kAccJavaFlagsMask);
-  if (h_super.Get() != NULL) {
+  if (h_super.Get() != nullptr) {
     os << "  super='" << PrettyClass(h_super.Get()) << "' (cl=" << h_super->GetClassLoader()
        << ")\n";
   }
@@ -217,7 +217,7 @@
   }
   // After this point, this may have moved due to GetDirectInterface.
   os << "  vtable (" << h_this->NumVirtualMethods() << " entries, "
-     << (h_super.Get() != NULL ? h_super->NumVirtualMethods() : 0) << " in super):\n";
+     << (h_super.Get() != nullptr ? h_super->NumVirtualMethods() : 0) << " in super):\n";
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     os << StringPrintf("    %2zd: %s\n", i,
                        PrettyMethod(h_this->GetVirtualMethodDuringLinking(i)).c_str());
@@ -253,7 +253,7 @@
     // Sanity check that the number of bits set in the reference offset bitmap
     // agrees with the number of references
     size_t count = 0;
-    for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+    for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
       count += c->NumReferenceInstanceFieldsDuringLinking();
     }
     CHECK_EQ((size_t)POPCOUNT(new_reference_offsets), count);
@@ -329,40 +329,58 @@
   }
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const StringPiece& signature) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
-  if (method != NULL) {
+  if (method != nullptr) {
     return method;
   }
 
   int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
-  for (int32_t i = 0; i < iftable_count; i++) {
+  for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
+}
+
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) {
+  // Check the current class before checking the interfaces.
+  ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
+  if (method != nullptr) {
+    return method;
+  }
+
+  int32_t iftable_count = GetIfTableCount();
+  IfTable* iftable = GetIfTable();
+  for (int32_t i = 0; i < iftable_count; ++i) {
+    method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(name, signature);
+    if (method != nullptr) {
+      return method;
+    }
+  }
+  return nullptr;
 }
 
 ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-  if (method != NULL) {
+  if (method != nullptr) {
     return method;
   }
 
   int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
-  for (int32_t i = 0; i < iftable_count; i++) {
+  for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) {
@@ -372,7 +390,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) {
@@ -382,7 +400,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
@@ -394,37 +412,37 @@
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) {
@@ -434,7 +452,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature) {
@@ -444,7 +462,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
@@ -456,37 +474,37 @@
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const StringPiece& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const Signature& signature) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
-  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
-    if (method != NULL) {
+    if (method != nullptr) {
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtMethod* Class::FindClassInitializer() {
@@ -498,7 +516,7 @@
       return method;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type) {
@@ -510,7 +528,7 @@
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
@@ -522,42 +540,42 @@
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindInstanceField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class, or any of its superclasses?
   // Interfaces are not relevant because they can't contain instance fields.
-  for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+  for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
     ArtField* f = c->FindDeclaredInstanceField(name, type);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
   // Is the field in this class, or any of its superclasses?
   // Interfaces are not relevant because they can't contain instance fields.
-  for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
+  for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
     ArtField* f = c->FindDeclaredInstanceField(dex_cache, dex_field_idx);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredStaticField(const StringPiece& name, const StringPiece& type) {
-  DCHECK(type != NULL);
+  DCHECK(type != nullptr);
   for (size_t i = 0; i < NumStaticFields(); ++i) {
     ArtField* f = GetStaticField(i);
     if (name == f->GetName() && type == f->GetTypeDescriptor()) {
       return f;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx) {
@@ -569,7 +587,7 @@
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const StringPiece& name,
@@ -603,7 +621,7 @@
   for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
     // Wrap k incase it moves during GetDirectInterface.
@@ -625,7 +643,7 @@
 ArtField* Class::FindField(Thread* self, Handle<Class> klass, const StringPiece& name,
                            const StringPiece& type) {
   // Find a field using the JLS field resolution order
-  for (Class* k = klass.Get(); k != NULL; k = k->GetSuperClass()) {
+  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredInstanceField(name, type);
     if (f != nullptr) {
@@ -652,10 +670,10 @@
 
 static void SetPreverifiedFlagOnMethods(mirror::ObjectArray<mirror::ArtMethod>* methods)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (methods != NULL) {
+  if (methods != nullptr) {
     for (int32_t index = 0, end = methods->GetLength(); index < end; ++index) {
       mirror::ArtMethod* method = methods->GetWithoutChecks(index);
-      DCHECK(method != NULL);
+      DCHECK(method != nullptr);
       if (!method->IsNative() && !method->IsAbstract()) {
         method->SetPreverified();
       }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 90381a7..c83f411 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -648,6 +648,9 @@
   ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  ArtMethod* FindInterfaceMethod(const StringPiece& name, const StringPiece& signature)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/iftable-inl.h b/runtime/mirror/iftable-inl.h
index ec3e514..3f20bf4 100644
--- a/runtime/mirror/iftable-inl.h
+++ b/runtime/mirror/iftable-inl.h
@@ -25,8 +25,9 @@
 inline void IfTable::SetInterface(int32_t i, Class* interface) {
   DCHECK(interface != NULL);
   DCHECK(interface->IsInterface());
-  DCHECK(Get((i * kMax) + kInterface) == NULL);
-  Set<false>((i * kMax) + kInterface, interface);
+  const size_t idx = i * kMax + kInterface;
+  DCHECK_EQ(Get(idx), static_cast<Object*>(nullptr));
+  Set<false>(idx, interface);
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 567ce3e..15ecd3c 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -26,6 +26,7 @@
 #include "class.h"
 #include "lock_word-inl.h"
 #include "monitor.h"
+#include "object_array-inl.h"
 #include "read_barrier-inl.h"
 #include "runtime.h"
 #include "reference.h"
@@ -667,10 +668,9 @@
         mirror::ArtField* field = kIsStatic ? klass->GetStaticField(i) : klass->GetInstanceField(i);
         MemberOffset field_offset = field->GetOffset();
         // TODO: Do a simpler check?
-        if (!kVisitClass && UNLIKELY(field_offset.Uint32Value() == ClassOffset().Uint32Value())) {
-          continue;
+        if (kVisitClass || field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
+          visitor(this, field_offset, kIsStatic);
         }
-        visitor(this, field_offset, kIsStatic);
       }
     }
   }
@@ -693,18 +693,16 @@
 inline void Object::VisitReferences(const Visitor& visitor,
                                     const JavaLangRefVisitor& ref_visitor) {
   mirror::Class* klass = GetClass<kVerifyFlags>();
-  if (klass->IsVariableSize()) {
-    if (klass->IsClassClass()) {
-      AsClass<kVerifyNone>()->VisitReferences<kVisitClass>(klass, visitor);
-    } else {
-      DCHECK(klass->IsArrayClass<kVerifyFlags>());
-      if (klass->IsObjectArrayClass<kVerifyNone>()) {
-        AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences<kVisitClass>(visitor);
-      } else if (kVisitClass) {
-        visitor(this, ClassOffset(), false);
-      }
+  if (klass == Class::GetJavaLangClass()) {
+    AsClass<kVerifyNone>()->VisitReferences<kVisitClass>(klass, visitor);
+  } else if (klass->IsArrayClass()) {
+    if (klass->IsObjectArrayClass<kVerifyNone>()) {
+      AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences<kVisitClass>(visitor);
+    } else if (kVisitClass) {
+      visitor(this, ClassOffset(), false);
     }
   } else {
+    DCHECK(!klass->IsVariableSize());
     VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
     if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) {
       ref_visitor(klass, AsReference());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 87106d6..7cdd8f5 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -113,6 +113,8 @@
     return gc::kCollectorTypeGSS;
   } else if (option == "CC") {
     return gc::kCollectorTypeCC;
+  } else if (option == "MC") {
+    return gc::kCollectorTypeMC;
   } else {
     return gc::kCollectorTypeNone;
   }
@@ -563,6 +565,10 @@
       if (!ParseDouble(option, ':', 0.0, 100.0, &profiler_options_.top_k_change_threshold_)) {
         return false;
       }
+    } else if (option == "-Xprofile-type:method") {
+      profiler_options_.profile_type_ = kProfilerMethod;
+    } else if (option == "-Xprofile-type:dexpc") {
+      profiler_options_.profile_type_ = kProfilerMethodAndDexPC;
     } else if (StartsWith(option, "-implicit-checks:")) {
       std::string checks;
       if (!ParseStringAfterChar(option, ':', &checks)) {
@@ -806,6 +812,7 @@
   UsageMessage(stream, "  -Xprofile-start-immediately\n");
   UsageMessage(stream, "  -Xprofile-top-k-threshold:doublevalue\n");
   UsageMessage(stream, "  -Xprofile-top-k-change-threshold:doublevalue\n");
+  UsageMessage(stream, "  -Xprofile-type:{method,dexpc}\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 00bb501..2cd876a 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -63,7 +63,8 @@
 static void GetSample(Thread* thread, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   BackgroundMethodSamplingProfiler* profiler =
       reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-  mirror::ArtMethod* method = thread->GetCurrentMethod(nullptr);
+  uint32_t dex_pc;
+  mirror::ArtMethod* method = thread->GetCurrentMethod(&dex_pc);
   if (false && method == nullptr) {
     LOG(INFO) << "No current method available";
     std::ostringstream os;
@@ -71,7 +72,7 @@
     std::string data(os.str());
     LOG(INFO) << data;
   }
-  profiler->RecordMethod(method);
+  profiler->RecordMethod(method, dex_pc);
 }
 
 // A closure that is called by the thread checkpoint code.
@@ -244,7 +245,7 @@
   }
 
   // Read the previous profile.
-  profile_table_.ReadPrevious(fd);
+  profile_table_.ReadPrevious(fd, options_.GetProfileType());
 
   // Move back to the start of the file.
   lseek(fd, 0, SEEK_SET);
@@ -360,7 +361,7 @@
 
 // A method has been hit, record its invocation in the method map.
 // The mutator_lock must be held (shared) when this is called.
-void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method) {
+void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method, uint32_t dex_pc) {
   if (method == nullptr) {
     profile_table_.NullMethod();
     // Don't record a nullptr method.
@@ -393,7 +394,11 @@
 
   // Add to the profile table unless it is filtered out.
   if (!is_filtered) {
-    profile_table_.Put(method);
+    if (options_.GetProfileType() == kProfilerMethod) {
+      profile_table_.Put(method);
+    } else if (options_.GetProfileType() == kProfilerMethodAndDexPC) {
+      profile_table_.PutDexPC(method, dex_pc);
+    }
   }
 }
 
@@ -403,7 +408,7 @@
 }
 
 uint32_t BackgroundMethodSamplingProfiler::DumpProfile(std::ostream& os) {
-  return profile_table_.Write(os);
+  return profile_table_.Write(os, options_.GetProfileType());
 }
 
 // Profile Table.
@@ -414,19 +419,18 @@
     num_boot_methods_(0) {
   for (int i = 0; i < kHashSize; i++) {
     table[i] = nullptr;
+    dex_table[i] = nullptr;
   }
 }
 
 ProfileSampleResults::~ProfileSampleResults() {
-  for (int i = 0; i < kHashSize; i++) {
-     delete table[i];
-  }
+  Clear();
 }
 
 // Add a method to the profile table.  If it's the first time the method
 // has been seen, add it with count=1, otherwise increment the count.
 void ProfileSampleResults::Put(mirror::ArtMethod* method) {
-  lock_.Lock(Thread::Current());
+  MutexLock mu(Thread::Current(), lock_);
   uint32_t index = Hash(method);
   if (table[index] == nullptr) {
     table[index] = new Map();
@@ -438,11 +442,34 @@
     i->second++;
   }
   num_samples_++;
-  lock_.Unlock(Thread::Current());
+}
+
+// Add a method with dex pc to the profile table
+void ProfileSampleResults::PutDexPC(mirror::ArtMethod* method, uint32_t dex_pc) {
+  MutexLock mu(Thread::Current(), lock_);
+  uint32_t index = Hash(method);
+  if (dex_table[index] == nullptr) {
+    dex_table[index] = new MethodDexPCMap();
+  }
+  MethodDexPCMap::iterator i = dex_table[index]->find(method);
+  if (i == dex_table[index]->end()) {
+    DexPCCountMap* dex_pc_map = new DexPCCountMap();
+    (*dex_pc_map)[dex_pc] = 1;
+    (*dex_table[index])[method] = dex_pc_map;
+  } else {
+    DexPCCountMap* dex_pc_count = i->second;
+    DexPCCountMap::iterator dex_pc_i = dex_pc_count->find(dex_pc);
+    if (dex_pc_i == dex_pc_count->end()) {
+      (*dex_pc_count)[dex_pc] = 1;
+    } else {
+      dex_pc_i->second++;
+    }
+  }
+  num_samples_++;
 }
 
 // Write the profile table to the output stream.  Also merge with the previous profile.
-uint32_t ProfileSampleResults::Write(std::ostream &os) {
+uint32_t ProfileSampleResults::Write(std::ostream& os, ProfileDataType type) {
   ScopedObjectAccess soa(Thread::Current());
   num_samples_ += previous_num_samples_;
   num_null_methods_ += previous_num_null_methods_;
@@ -452,36 +479,101 @@
                  << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
   os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
   uint32_t num_methods = 0;
-  for (int i = 0 ; i < kHashSize; i++) {
-    Map *map = table[i];
-    if (map != nullptr) {
-      for (const auto &meth_iter : *map) {
-        mirror::ArtMethod *method = meth_iter.first;
-        std::string method_name = PrettyMethod(method);
+  if (type == kProfilerMethod) {
+    for (int i = 0 ; i < kHashSize; i++) {
+      Map *map = table[i];
+      if (map != nullptr) {
+        for (const auto &meth_iter : *map) {
+          mirror::ArtMethod *method = meth_iter.first;
+          std::string method_name = PrettyMethod(method);
 
-        const DexFile::CodeItem* codeitem = method->GetCodeItem();
-        uint32_t method_size = 0;
-        if (codeitem != nullptr) {
-          method_size = codeitem->insns_size_in_code_units_;
-        }
-        uint32_t count = meth_iter.second;
+          const DexFile::CodeItem* codeitem = method->GetCodeItem();
+          uint32_t method_size = 0;
+          if (codeitem != nullptr) {
+            method_size = codeitem->insns_size_in_code_units_;
+          }
+          uint32_t count = meth_iter.second;
 
-        // Merge this profile entry with one from a previous run (if present).  Also
-        // remove the previous entry.
-        PreviousProfile::iterator pi = previous_.find(method_name);
-        if (pi != previous_.end()) {
-          count += pi->second.count_;
-          previous_.erase(pi);
+          // Merge this profile entry with one from a previous run (if present).  Also
+          // remove the previous entry.
+          PreviousProfile::iterator pi = previous_.find(method_name);
+          if (pi != previous_.end()) {
+            count += pi->second.count_;
+            previous_.erase(pi);
+          }
+          os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
+          ++num_methods;
         }
-        os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
-        ++num_methods;
+      }
+    }
+  } else if (type == kProfilerMethodAndDexPC) {
+    for (int i = 0 ; i < kHashSize; i++) {
+      MethodDexPCMap *dex_map = dex_table[i];
+      if (dex_map != nullptr) {
+        for (const auto &dex_pc_iter : *dex_map) {
+          mirror::ArtMethod *method = dex_pc_iter.first;
+          std::string method_name = PrettyMethod(method);
+
+          const DexFile::CodeItem* codeitem = method->GetCodeItem();
+          uint32_t method_size = 0;
+          if (codeitem != nullptr) {
+            method_size = codeitem->insns_size_in_code_units_;
+          }
+          DexPCCountMap* dex_pc_map = dex_pc_iter.second;
+          uint32_t total_count = 0;
+          for (const auto &dex_pc_i : *dex_pc_map) {
+            total_count += dex_pc_i.second;
+          }
+
+          PreviousProfile::iterator pi = previous_.find(method_name);
+          if (pi != previous_.end()) {
+            total_count += pi->second.count_;
+            DexPCCountMap* previous_dex_pc_map = pi->second.dex_pc_map_;
+            if (previous_dex_pc_map != nullptr) {
+              for (const auto &dex_pc_i : *previous_dex_pc_map) {
+                uint32_t dex_pc = dex_pc_i.first;
+                uint32_t count = dex_pc_i.second;
+                DexPCCountMap::iterator di = dex_pc_map->find(dex_pc);
+                if (di == dex_pc_map->end()) {
+                  (*dex_pc_map)[dex_pc] = count;
+                } else {
+                  di->second += count;
+                }
+              }
+            }
+            delete previous_dex_pc_map;
+            previous_.erase(pi);
+          }
+          std::vector<std::string> dex_pc_count_vector;
+          for (const auto &dex_pc_i : *dex_pc_map) {
+            dex_pc_count_vector.push_back(StringPrintf("%u:%u", dex_pc_i.first, dex_pc_i.second));
+          }
+          // We write out profile data with dex pc information in the following format:
+          // "method/total_count/size/[pc_1:count_1,pc_2:count_2,...]".
+          os << StringPrintf("%s/%u/%u/[%s]\n", method_name.c_str(), total_count,
+              method_size, Join(dex_pc_count_vector, ',').c_str());
+          ++num_methods;
+        }
       }
     }
   }
 
   // Now we write out the remaining previous methods.
-  for (PreviousProfile::iterator pi = previous_.begin(); pi != previous_.end(); ++pi) {
-    os << StringPrintf("%s/%u/%u\n",  pi->first.c_str(), pi->second.count_, pi->second.method_size_);
+  for (const auto &pi : previous_) {
+    if (type == kProfilerMethod) {
+      os << StringPrintf("%s/%u/%u\n",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
+    } else if (type == kProfilerMethodAndDexPC) {
+      os << StringPrintf("%s/%u/%u/[",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
+      DexPCCountMap* previous_dex_pc_map = pi.second.dex_pc_map_;
+      if (previous_dex_pc_map != nullptr) {
+        std::vector<std::string> dex_pc_count_vector;
+        for (const auto &dex_pc_i : *previous_dex_pc_map) {
+          dex_pc_count_vector.push_back(StringPrintf("%u:%u", dex_pc_i.first, dex_pc_i.second));
+        }
+        os << Join(dex_pc_count_vector, ',');
+      }
+      os << "]\n";
+    }
     ++num_methods;
   }
   return num_methods;
@@ -492,8 +584,20 @@
   num_null_methods_ = 0;
   num_boot_methods_ = 0;
   for (int i = 0; i < kHashSize; i++) {
-     delete table[i];
-     table[i] = nullptr;
+    delete table[i];
+    table[i] = nullptr;
+    if (dex_table[i] != nullptr) {
+      for (auto &di : *dex_table[i]) {
+        delete di.second;
+        di.second = nullptr;
+      }
+    }
+    delete dex_table[i];
+    dex_table[i] = nullptr;
+  }
+  for (auto &pi : previous_) {
+    delete pi.second.dex_pc_map_;
+    pi.second.dex_pc_map_ = nullptr;
   }
   previous_.clear();
 }
@@ -520,7 +624,7 @@
   return true;
 }
 
-void ProfileSampleResults::ReadPrevious(int fd) {
+void ProfileSampleResults::ReadPrevious(int fd, ProfileDataType type) {
   // Reset counters.
   previous_num_samples_ = previous_num_null_methods_ = previous_num_boot_methods_ = 0;
 
@@ -540,21 +644,35 @@
   previous_num_null_methods_ = atoi(summary_info[1].c_str());
   previous_num_boot_methods_ = atoi(summary_info[2].c_str());
 
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by /
+  // Now read each line until the end of file.  Each line consists of 3 or 4 fields separated by /
   while (true) {
     if (!ReadProfileLine(fd, line)) {
       break;
     }
     std::vector<std::string> info;
     Split(line, '/', info);
-    if (info.size() != 3) {
+    if (info.size() != 3 && info.size() != 4) {
       // Malformed.
       break;
     }
     std::string methodname = info[0];
-    uint32_t count = atoi(info[1].c_str());
+    uint32_t total_count = atoi(info[1].c_str());
     uint32_t size = atoi(info[2].c_str());
-    previous_[methodname] = PreviousValue(count, size);
+    DexPCCountMap* dex_pc_map = nullptr;
+    if (type == kProfilerMethodAndDexPC && info.size() == 4) {
+      dex_pc_map = new DexPCCountMap();
+      std::string dex_pc_counts_str = info[3].substr(1, info[3].size() - 2);
+      std::vector<std::string> dex_pc_count_pairs;
+      Split(dex_pc_counts_str, ',', dex_pc_count_pairs);
+      for (uint32_t i = 0; i < dex_pc_count_pairs.size(); ++i) {
+        std::vector<std::string> dex_pc_count;
+        Split(dex_pc_count_pairs[i], ':', dex_pc_count);
+        uint32_t dex_pc = atoi(dex_pc_count[0].c_str());
+        uint32_t count = atoi(dex_pc_count[1].c_str());
+        (*dex_pc_map)[dex_pc] = count;
+      }
+    }
+    previous_[methodname] = PreviousValue(total_count, size, dex_pc_map);
   }
 }
 
@@ -604,7 +722,7 @@
     }
     std::vector<std::string> info;
     Split(line, '/', info);
-    if (info.size() != 3) {
+    if (info.size() != 3 && info.size() != 4) {
       // Malformed.
       return false;
     }
diff --git a/runtime/profiler.h b/runtime/profiler.h
index 0b18dbb..396dd23 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -53,8 +53,9 @@
   ~ProfileSampleResults();
 
   void Put(mirror::ArtMethod* method);
-  uint32_t Write(std::ostream &os);
-  void ReadPrevious(int fd);
+  void PutDexPC(mirror::ArtMethod* method, uint32_t pc);
+  uint32_t Write(std::ostream &os, ProfileDataType type);
+  void ReadPrevious(int fd, ProfileDataType type);
   void Clear();
   uint32_t GetNumSamples() { return num_samples_; }
   void NullMethod() { ++num_null_methods_; }
@@ -68,15 +69,21 @@
   uint32_t num_null_methods_;    // Number of samples where can don't know the method.
   uint32_t num_boot_methods_;    // Number of samples in the boot path.
 
-  typedef std::map<mirror::ArtMethod*, uint32_t> Map;   // Map of method vs its count.
+  typedef std::map<mirror::ArtMethod*, uint32_t> Map;  // Map of method vs its count.
   Map *table[kHashSize];
 
+  typedef std::map<uint32_t, uint32_t> DexPCCountMap;  // Map of dex pc vs its count
+  // Map of method vs dex pc counts in the method.
+  typedef std::map<mirror::ArtMethod*, DexPCCountMap*> MethodDexPCMap;
+  MethodDexPCMap *dex_table[kHashSize];
+
   struct PreviousValue {
-    PreviousValue() : count_(0), method_size_(0) {}
-    PreviousValue(uint32_t count, uint32_t method_size)
-      : count_(count), method_size_(method_size) {}
+    PreviousValue() : count_(0), method_size_(0), dex_pc_map_(nullptr) {}
+    PreviousValue(uint32_t count, uint32_t method_size, DexPCCountMap* dex_pc_map)
+      : count_(count), method_size_(method_size), dex_pc_map_(dex_pc_map) {}
     uint32_t count_;
     uint32_t method_size_;
+    DexPCCountMap* dex_pc_map_;
   };
 
   typedef std::map<std::string, PreviousValue> PreviousProfile;
@@ -114,7 +121,7 @@
   static void Stop() LOCKS_EXCLUDED(Locks::profiler_lock_, wait_lock_);
   static void Shutdown() LOCKS_EXCLUDED(Locks::profiler_lock_);
 
-  void RecordMethod(mirror::ArtMethod *method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void RecordMethod(mirror::ArtMethod *method, uint32_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Barrier& GetBarrier() {
     return *profiler_barrier_;
diff --git a/runtime/profiler_options.h b/runtime/profiler_options.h
index 08e32cc..0b63003 100644
--- a/runtime/profiler_options.h
+++ b/runtime/profiler_options.h
@@ -22,6 +22,11 @@
 
 namespace art {
 
+enum ProfileDataType {
+  kProfilerMethod,          // Method only
+  kProfilerMethodAndDexPC,  // Method with Dex PC
+};
+
 class ProfilerOptions {
  public:
   static constexpr bool kDefaultEnabled = false;
@@ -32,6 +37,7 @@
   static constexpr bool kDefaultStartImmediately = false;
   static constexpr double kDefaultTopKThreshold = 90.0;
   static constexpr double kDefaultChangeInTopKThreshold = 10.0;
+  static constexpr ProfileDataType kDefaultProfileData = kProfilerMethod;
 
   ProfilerOptions() :
     enabled_(kDefaultEnabled),
@@ -41,7 +47,8 @@
     backoff_coefficient_(kDefaultBackoffCoefficient),
     start_immediately_(kDefaultStartImmediately),
     top_k_threshold_(kDefaultTopKThreshold),
-    top_k_change_threshold_(kDefaultChangeInTopKThreshold) {}
+    top_k_change_threshold_(kDefaultChangeInTopKThreshold),
+    profile_type_(kDefaultProfileData) {}
 
   ProfilerOptions(bool enabled,
                  uint32_t period_s,
@@ -50,7 +57,8 @@
                  double backoff_coefficient,
                  bool start_immediately,
                  double top_k_threshold,
-                 double top_k_change_threshold):
+                 double top_k_change_threshold,
+                 ProfileDataType profile_type):
     enabled_(enabled),
     period_s_(period_s),
     duration_s_(duration_s),
@@ -58,7 +66,8 @@
     backoff_coefficient_(backoff_coefficient),
     start_immediately_(start_immediately),
     top_k_threshold_(top_k_threshold),
-    top_k_change_threshold_(top_k_change_threshold) {}
+    top_k_change_threshold_(top_k_change_threshold),
+    profile_type_(profile_type) {}
 
   bool IsEnabled() const {
     return enabled_;
@@ -92,6 +101,10 @@
     return top_k_change_threshold_;
   }
 
+  ProfileDataType GetProfileType() const {
+    return profile_type_;
+  }
+
  private:
   friend std::ostream & operator<<(std::ostream &os, const ProfilerOptions& po) {
     os << "enabled=" << po.enabled_
@@ -101,7 +114,8 @@
        << ", backoff_coefficient=" << po.backoff_coefficient_
        << ", start_immediately=" << po.start_immediately_
        << ", top_k_threshold=" << po.top_k_threshold_
-       << ", top_k_change_threshold=" << po.top_k_change_threshold_;
+       << ", top_k_change_threshold=" << po.top_k_change_threshold_
+       << ", profile_type=" << po.profile_type_;
     return os;
   }
 
@@ -123,6 +137,8 @@
   double top_k_threshold_;
   // How much the top K% samples needs to change in order for the app to be recompiled.
   double top_k_change_threshold_;
+  // The type of profile data dumped to the disk.
+  ProfileDataType profile_type_;
 };
 
 }  // namespace art
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 451d13c..ed5db4e 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -33,11 +33,15 @@
 
 class ReadBarrier {
  public:
+  // It's up to the implementation whether the given field gets
+  // updated whereas the return value must be an updated reference.
   template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // It's up to the implementation whether the given root gets updated
+  // whereas the return value must be an updated reference.
   template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 11527fa..cd35863 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -24,6 +24,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/string-inl.h"
+#include "read_barrier.h"
 #include "thread.h"
 #include "utils.h"
 
@@ -51,7 +52,9 @@
 void ReferenceTable::Remove(mirror::Object* obj) {
   // We iterate backwards on the assumption that references are LIFO.
   for (int i = entries_.size() - 1; i >= 0; --i) {
-    if (entries_[i] == obj) {
+    mirror::Object* entry =
+        ReadBarrier::BarrierForRoot<mirror::Object, kWithReadBarrier>(&entries_[i]);
+    if (entry == obj) {
       entries_.erase(entries_.begin() + i);
       return;
     }
@@ -140,12 +143,12 @@
   return entries_.size();
 }
 
-void ReferenceTable::Dump(std::ostream& os) const {
+void ReferenceTable::Dump(std::ostream& os) {
   os << name_ << " reference table dump:\n";
   Dump(os, entries_);
 }
 
-void ReferenceTable::Dump(std::ostream& os, const Table& entries) {
+void ReferenceTable::Dump(std::ostream& os, Table& entries) {
   if (entries.empty()) {
     os << "  (empty)\n";
     return;
@@ -160,7 +163,8 @@
   }
   os << "  Last " << (count - first) << " entries (of " << count << "):\n";
   for (int idx = count - 1; idx >= first; --idx) {
-    mirror::Object* ref = entries[idx];
+    mirror::Object* ref =
+        ReadBarrier::BarrierForRoot<mirror::Object, kWithReadBarrier>(&entries[idx]);
     if (ref == NULL) {
       continue;
     }
@@ -194,7 +198,12 @@
   }
 
   // Make a copy of the table and sort it.
-  Table sorted_entries(entries.begin(), entries.end());
+  Table sorted_entries;
+  for (size_t i = 0; i < entries.size(); ++i) {
+    mirror::Object* entry =
+        ReadBarrier::BarrierForRoot<mirror::Object, kWithReadBarrier>(&entries[i]);
+    sorted_entries.push_back(entry);
+  }
   std::sort(sorted_entries.begin(), sorted_entries.end(), ObjectComparator());
 
   // Remove any uninteresting stuff from the list. The sort moved them all to the end.
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 45309c9..1cd0999 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -39,19 +39,19 @@
   ReferenceTable(const char* name, size_t initial_size, size_t max_size);
   ~ReferenceTable();
 
-  void Add(mirror::Object* obj);
+  void Add(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Remove(mirror::Object* obj);
+  void Remove(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t Size() const;
 
-  void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void VisitRoots(RootCallback* visitor, void* arg, uint32_t tid, RootType root_type);
 
  private:
   typedef std::vector<mirror::Object*> Table;
-  static void Dump(std::ostream& os, const Table& entries)
+  static void Dump(std::ostream& os, Table& entries)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   friend class IndirectReferenceTable;  // For Dump.
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 717381c..8aa7ea1 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -421,6 +421,9 @@
     int fd = open(profile_output_filename_.c_str(), O_RDWR|O_CREAT|O_EXCL, 0660);
     if (fd >= 0) {
       close(fd);
+    } else if (errno != EEXIST) {
+      LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
+      return true;
     }
     StartProfiler(profile_output_filename_.c_str());
   }
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 7ce68c6..d691623 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -146,7 +146,8 @@
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingFields);
-    return reinterpret_cast<mirror::ArtField*>(fid);
+    mirror::ArtField* field = reinterpret_cast<mirror::ArtField*>(fid);
+    return ReadBarrier::BarrierForRoot<mirror::ArtField, kWithReadBarrier>(&field);
   }
 
   jfieldID EncodeField(mirror::ArtField* field) const
@@ -162,7 +163,8 @@
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingMethods);
-    return reinterpret_cast<mirror::ArtMethod*>(mid);
+    mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(mid);
+    return ReadBarrier::BarrierForRoot<mirror::ArtMethod, kWithReadBarrier>(&method);
   }
 
   jmethodID EncodeMethod(mirror::ArtMethod* method) const
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6980530..e5ae6d0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -220,7 +220,7 @@
     // It's likely that callers are trying to ensure they have at least a certain amount of
     // stack space, so we should add our reserved space on top of what they requested, rather
     // than implicitly take it away from them.
-    stack_size += Thread::kStackOverflowReservedBytes;
+    stack_size += kRuntimeStackOverflowReservedBytes;
   } else {
     // If we are going to use implicit stack checks, allocate space for the protected
     // region at the bottom of the stack.
@@ -489,7 +489,7 @@
   tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
   tlsPtr_.stack_size = read_stack_size;
 
-  if (read_stack_size <= kStackOverflowReservedBytes) {
+  if (read_stack_size <= kRuntimeStackOverflowReservedBytes) {
     LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
         << " bytes)";
   }
@@ -1298,9 +1298,7 @@
     }
   } else if (kind == kGlobal) {
     JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
-    // Strong global references do not need a read barrier.
-    result = vm->globals.SynchronizedGet<kWithoutReadBarrier>(
-        const_cast<Thread*>(this), &vm->globals_lock, ref);
+    result = vm->globals.SynchronizedGet(const_cast<Thread*>(this), &vm->globals_lock, ref);
   } else {
     DCHECK_EQ(kind, kWeakGlobal);
     result = Runtime::Current()->GetJavaVM()->DecodeWeakGlobal(const_cast<Thread*>(this), ref);
@@ -2200,7 +2198,7 @@
   if (tlsPtr_.stack_end == tlsPtr_.stack_begin) {
     // However, we seem to have already extended to use the full stack.
     LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
-               << kStackOverflowReservedBytes << ")?";
+               << kRuntimeStackOverflowReservedBytes << ")?";
     DumpStack(LOG(ERROR));
     LOG(FATAL) << "Recursive stack overflow.";
   }
diff --git a/runtime/thread.h b/runtime/thread.h
index bff9b52..7cd86de 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -33,6 +33,7 @@
 #include "gc/allocator/rosalloc.h"
 #include "globals.h"
 #include "handle_scope.h"
+#include "instruction_set.h"
 #include "jvalue.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -94,28 +95,8 @@
 
 class Thread {
  public:
-  // Space to throw a StackOverflowError in.
-  // TODO: shrink reserved space, in particular for 64bit.
-#if defined(__x86_64__)
-  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__aarch64__)
-  // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
-  // But this one works rather well.
-  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__i386__)
-  // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
-  // test-art-host-run-test-interpreter-018-stack-overflow
-  // test-art-host-run-test-interpreter-107-int-math2
-  static constexpr size_t kStackOverflowReservedBytes = 24 * KB;
-#else
-  static constexpr size_t kStackOverflowReservedBytes = 16 * KB;
-#endif
   // How much of the reserved bytes is reserved for incoming signals.
   static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB;
-  // How much of the reserved bytes we may temporarily use during stack overflow checks as an
-  // optimization.
-  static constexpr size_t kStackOverflowReservedUsableBytes =
-      kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
 
   // For implicit overflow checks we reserve an extra piece of memory at the bottom
   // of the stack (lowest memory).  The higher portion of the memory
@@ -123,7 +104,7 @@
   // throwing the StackOverflow exception.
   static constexpr size_t kStackOverflowProtectedSize = 16 * KB;
   static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
-    kStackOverflowReservedBytes;
+      kRuntimeStackOverflowReservedBytes;
 
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
@@ -585,7 +566,7 @@
       // overflow region.
       tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowImplicitCheckSize;
     } else {
-      tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowReservedBytes;
+      tlsPtr_.stack_end = tlsPtr_.stack_begin + kRuntimeStackOverflowReservedBytes;
     }
   }
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index c9c3bba..89cfcdd 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -717,13 +717,28 @@
     case Instruction::kVerifySwitchTargets:
       result = result && CheckSwitchTargets(code_offset);
       break;
+    case Instruction::kVerifyVarArgNonZero:
+      // Fall-through.
     case Instruction::kVerifyVarArg: {
+      if (inst->GetVerifyExtraFlags() == Instruction::kVerifyVarArgNonZero && inst->VRegA() <= 0) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << inst->VRegA() << ") in "
+                                             "non-range invoke";
+        return false;
+      }
       uint32_t args[Instruction::kMaxVarArgRegs];
       inst->GetVarArgs(args);
       result = result && CheckVarArgRegs(inst->VRegA(), args);
       break;
     }
+    case Instruction::kVerifyVarArgRangeNonZero:
+      // Fall-through.
     case Instruction::kVerifyVarArgRange:
+      if (inst->GetVerifyExtraFlags() == Instruction::kVerifyVarArgRangeNonZero &&
+          inst->VRegA() <= 0) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << inst->VRegA() << ") in "
+                                             "range invoke";
+        return false;
+      }
       result = result && CheckVarArgRangeRegs(inst->VRegA(), inst->VRegC());
       break;
     case Instruction::kVerifyError:
@@ -1219,6 +1234,12 @@
         break;
       case 'J':
       case 'D': {
+        if (cur_arg + 1 >= expected_args) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected " << expected_args
+              << " args, found more (" << descriptor << ")";
+          return false;
+        }
+
         const RegType& lo_half = descriptor[0] == 'J' ? reg_types_.LongLo() : reg_types_.DoubleLo();
         const RegType& hi_half = descriptor[0] == 'J' ? reg_types_.LongHi() : reg_types_.DoubleHi();
         reg_line->SetRegisterTypeWide(arg_start + cur_arg, lo_half, hi_half);
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index d21f39b..556056c 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -55,7 +55,7 @@
 
 bool RegisterLine::SetRegisterTypeWide(uint32_t vdst, const RegType& new_type1,
                                        const RegType& new_type2) {
-  DCHECK_LT(vdst, num_regs_);
+  DCHECK_LT(vdst + 1, num_regs_);
   if (!new_type1.CheckWidePair(new_type2)) {
     verifier_->Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "Invalid wide pair '"
         << new_type1 << "' '" << new_type2 << "'";
diff --git a/test/003-omnibus-opcodes/build b/test/003-omnibus-opcodes/build
index 9dff837..f909fb2 100644
--- a/test/003-omnibus-opcodes/build
+++ b/test/003-omnibus-opcodes/build
@@ -22,5 +22,5 @@
 rm classes/UnresClass.class
 ${JAVAC} -d classes `find src2 -name '*.java'`
 
-${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex classes
+${DX} -JXmx256m --debug --dex --output=classes.dex classes
 zip $TEST_NAME.jar classes.dex
diff --git a/test/056-const-string-jumbo/build b/test/056-const-string-jumbo/build
index a12c9d3..ef286d1 100644
--- a/test/056-const-string-jumbo/build
+++ b/test/056-const-string-jumbo/build
@@ -42,5 +42,5 @@
 mkdir classes
 ${JAVAC} -d classes src/*.java
 
-${DX} -JXmx500m --debug --dex --no-optimize --positions=none --no-locals --dump-to=classes.lst --output=classes.dex classes
+${DX} -JXmx500m --debug --dex --no-optimize --positions=none --no-locals --output=classes.dex classes
 zip $TEST_NAME.jar classes.dex
diff --git a/test/112-double-math/expected.txt b/test/112-double-math/expected.txt
new file mode 100644
index 0000000..1e10a95
--- /dev/null
+++ b/test/112-double-math/expected.txt
@@ -0,0 +1 @@
+cond_neg_double PASSED
diff --git a/test/112-double-math/info.txt b/test/112-double-math/info.txt
new file mode 100644
index 0000000..a32f4e0
--- /dev/null
+++ b/test/112-double-math/info.txt
@@ -0,0 +1 @@
+This checks the neg_double bytecode.
diff --git a/test/112-double-math/src/Main.java b/test/112-double-math/src/Main.java
new file mode 100644
index 0000000..8172dfa
--- /dev/null
+++ b/test/112-double-math/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2007 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static double cond_neg_double(double value, boolean cond) {
+        return cond ? -value : value;
+    }
+
+    public static void main(String args[]) {
+        double result = cond_neg_double(-1.0d, true);
+
+        if (Double.doubleToRawLongBits(result) == 0x3ff0000000000000L) {
+            System.out.println("cond_neg_double PASSED");
+        } else {
+            System.out.println("cond_neg_double FAILED " + result);
+        }
+    }
+}
diff --git a/test/302-float-conversion/expected.txt b/test/302-float-conversion/expected.txt
index 7d5c1eb..0423076 100644
--- a/test/302-float-conversion/expected.txt
+++ b/test/302-float-conversion/expected.txt
@@ -1,2 +1,3 @@
 Iteration Result is as expected
 inter4:2.0
+max_long:9223372036854775807
diff --git a/test/302-float-conversion/src/Main.java b/test/302-float-conversion/src/Main.java
index afc5e97..2733135 100644
--- a/test/302-float-conversion/src/Main.java
+++ b/test/302-float-conversion/src/Main.java
@@ -21,6 +21,7 @@
     public static void main(String args[]) {
         test1();
         test2();
+        test3();
     }
 
     public static void test1() {
@@ -55,4 +56,9 @@
         System.out.println("inter4:" + inter4);
     }
 
+    public static void test3() {
+        double d = Long.MAX_VALUE;
+        System.out.println("max_long:" + (long)d);
+    }
+
 }
diff --git a/test/303-verification-stress/build b/test/303-verification-stress/build
index 2ef9bea..c1935d2 100644
--- a/test/303-verification-stress/build
+++ b/test/303-verification-stress/build
@@ -24,5 +24,5 @@
 mkdir classes
 ${JAVAC} -d classes src/*.java
 
-${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes
+${DX} --debug --dex --output=classes.dex classes
 zip $TEST_NAME.jar classes.dex
diff --git a/test/run-test b/test/run-test
index 34b06cc..d1c5bb2 100755
--- a/test/run-test
+++ b/test/run-test
@@ -298,6 +298,17 @@
 
 export TEST_NAME=`basename ${test_dir}`
 
+# To cause tests to fail fast, limit the file sizes created by dx, dex2oat and ART output to 2MB.
+file_size_limit=2048
+if echo "$test_dir" | grep 089; then
+  file_size_limit=5120
+elif echo "$test_dir" | grep 083; then
+  file_size_limit=5120
+fi
+if ! ulimit "$file_size_limit"; then
+   echo "ulimit file size setting failed"
+fi
+
 good="no"
 if [ "$dev_mode" = "yes" ]; then
     "./${build}" 2>&1
@@ -376,7 +387,7 @@
         echo '#################### info'
         cat "${td_info}" | sed 's/^/# /g'
         echo '#################### diffs'
-        diff --strip-trailing-cr -u "$expected" "$output"
+        diff --strip-trailing-cr -u "$expected" "$output" | tail -n 500
         echo '####################'
         echo ' '
     fi