Merge "Support new VMRuntime native methods"
diff --git a/Android.mk b/Android.mk
index 0cd9166..3324458 100644
--- a/Android.mk
+++ b/Android.mk
@@ -40,20 +40,18 @@
 
 .PHONY: clean-oat-host
 clean-oat-host:
-	rm -f $(ART_NATIVETEST_OUT)/*.odex
-	rm -f $(ART_NATIVETEST_OUT)/*.oat
-	rm -f $(ART_NATIVETEST_OUT)/*.art
-	rm -f $(ART_TEST_OUT)/*.odex
-	rm -f $(ART_TEST_OUT)/*.oat
-	rm -f $(ART_TEST_OUT)/*.art
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.odex
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.oat
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.art
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.odex
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.oat
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.art
-	rm -f $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)/$(DEXPREOPT_BOOT_JAR_DIR)/*.oat
-	rm -f $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)/$(DEXPREOPT_BOOT_JAR_DIR)/*.art
+	rm -rf $(ART_NATIVETEST_OUT)
+	rm -rf $(ART_TEST_OUT)
+	rm -f $(HOST_CORE_IMG_OUT)
+	rm -f $(HOST_CORE_OAT_OUT)
+	rm -f $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/*.odex
+	rm -f $(TARGET_CORE_IMG_OUT)
+	rm -f $(TARGET_CORE_OAT_OUT)
+ifdef TARGET_2ND_ARCH
+	rm -f $(2ND_TARGET_CORE_IMG_OUT)
+	rm -f $(2ND_TARGET_CORE_OAT_OUT)
+endif
+	rm -rf $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.odex
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.oat
 	rm -f $(TARGET_OUT_APPS)/*.odex
@@ -69,24 +67,15 @@
 .PHONY: clean-oat-target
 clean-oat-target:
 	adb remount
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.odex
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.oat
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.art
-	adb shell rm -f $(ART_TEST_DIR)/*.odex
-	adb shell rm -f $(ART_TEST_DIR)/*.oat
-	adb shell rm -f $(ART_TEST_DIR)/*.art
-ifdef TARGET_2ND_ARCH
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.odex
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.oat
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.art
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.odex
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.oat
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.art
-endif
+	adb shell rm -rf $(ART_NATIVETEST_DIR)
+	adb shell rm -rf $(ART_TEST_DIR)
 	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
-	adb shell rm -f $(DEXPREOPT_BOOT_JAR_DIR)/*.oat
-	adb shell rm -f $(DEXPREOPT_BOOT_JAR_DIR)/*.art
-	adb shell rm -f system/app/*.odex
+	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$(DEX2OAT_TARGET_ARCH)
+	adb shell rm -rf system/app/$(DEX2OAT_TARGET_ARCH)
+ifdef TARGET_2ND_ARCH
+	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
+	adb shell rm -rf system/app/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
+endif
 	adb shell rm -rf data/run-test/test-*/dalvik-cache/*
 
 ifneq ($(art_dont_bother),true)
@@ -108,6 +97,9 @@
 ART_HOST_DEPENDENCIES := $(ART_HOST_EXECUTABLES) $(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar
 ART_HOST_DEPENDENCIES += $(HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
 ART_TARGET_DEPENDENCIES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar $(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
+ifdef TARGET_2ND_ARCH
+ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
+endif
 
 ########################################################################
 # test targets
@@ -198,14 +190,14 @@
 define declare-test-art-host-run-test
 .PHONY: test-art-host-run-test-default-$(1)
 test-art-host-run-test-default-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) --host $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host $(1)
 	@echo test-art-host-run-test-default-$(1) PASSED
 
 TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS += test-art-host-run-test-default-$(1)
 
 .PHONY: test-art-host-run-test-interpreter-$(1)
 test-art-host-run-test-interpreter-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) --host --interpreter $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host --interpreter $(1)
 	@echo test-art-host-run-test-interpreter-$(1) PASSED
 
 TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS += test-art-host-run-test-interpreter-$(1)
@@ -240,10 +232,9 @@
 endef
 $(eval $(call call-art-multi-target-rule,declare-test-art-target,test-art-target))
 
-
 define declare-test-art-target-dependencies
 .PHONY: test-art-target-dependencies$(1)
-test-art-target-dependencies$(1): $(ART_TARGET_TEST_DEPENDENCIES$(1)) $(ART_TEST_OUT)/libarttest.so
+test-art-target-dependencies$(1): $(ART_TARGET_TEST_DEPENDENCIES$(1)) $(ART_TARGET_LIBARTTEST_$(1))
 endef
 $(eval $(call call-art-multi-target-rule,declare-test-art-target-dependencies,test-art-target-dependencies))
 
@@ -278,7 +269,7 @@
 endif
 .PHONY: test-art-target-run-test-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX)
 test-art-target-run-test-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-sync $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) $$($(2)run_test_$(1)) $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) $$($(2)run_test_$(1)) $(1)
 	@echo test-art-target-run-test-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX) PASSED
 endef
 
@@ -379,7 +370,7 @@
 ########################################################################
 # "m art-host" for just building the files needed to run the art script
 .PHONY: art-host
-art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_OUT_JAVA_LIBRARIES)/core.art $(HOST_OUT)/lib/libjavacore.so
+art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_CORE_IMG_OUT) $(HOST_OUT)/lib/libjavacore.so
 
 .PHONY: art-host-debug
 art-host-debug:   art-host $(HOST_OUT)/lib/libartd.so $(HOST_OUT)/bin/dex2oatd
@@ -403,7 +394,7 @@
 .PHONY: dump-oat-core-host
 ifeq ($(ART_BUILD_HOST),true)
 dump-oat-core-host: $(HOST_CORE_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(HOST_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
+	$(OATDUMP) --image=$(HOST_CORE_IMG_LOCATION) --output=$(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
 	@echo Output in $(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
 endif
 
@@ -467,6 +458,16 @@
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
 	adb shell start
 
+.PHONY: use-artd-full
+use-artd-full:
+	adb root && sleep 3
+	adb shell stop
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
+	adb shell setprop dalvik.vm.dex2oat-flags ""
+	adb shell setprop dalvik.vm.image-dex2oat-flags ""
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libartd.so
+	adb shell start
+
 .PHONY: use-art-smart
 use-art-smart:
 	adb root && sleep 3
diff --git a/build/Android.common.mk b/build/Android.common.mk
index cc600bd..ae54efb 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -103,7 +103,7 @@
 
 ifeq ($(ART_USE_OPTIMIZING_COMPILER),true)
 DEX2OAT_FLAGS := --compiler-backend=Optimizing
-DALVIKVM_FLAGS := -Xcompiler-option --compiler-backend=Optimizing
+DALVIKVM_FLAGS += -Xcompiler-option --compiler-backend=Optimizing
 endif
 
 #
@@ -136,19 +136,17 @@
 ART_DALVIK_CACHE_DIR := /data/dalvik-cache
 
 # directory used for gtests on device
-ART_BASE_NATIVETEST_DIR := /data/nativetest/art
-ART_BASE_NATIVETEST_OUT := $(TARGET_OUT_DATA_NATIVE_TESTS)/art
+ART_NATIVETEST_DIR := /data/nativetest/art
+ART_NATIVETEST_OUT := $(TARGET_OUT_DATA_NATIVE_TESTS)/art
 
-# directory used for tests on device
-ART_BASE_TEST_DIR := /data/art-test
-ART_BASE_TEST_OUT := $(TARGET_OUT_DATA)/art-test
+# directory used for oat tests on device
+ART_TEST_DIR := /data/art-test
+ART_TEST_OUT := $(TARGET_OUT_DATA)/art-test
 
 # Primary vs. secondary
 2ND_TARGET_ARCH := $(TARGET_2ND_ARCH)
 ART_PHONY_TEST_TARGET_SUFFIX :=
 2ND_ART_PHONY_TEST_TARGET_SUFFIX :=
-ART_TARGET_BINARY_SUFFIX :=
-2ND_ART_TARGET_BINARY_SUFFIX :=
 ifdef TARGET_2ND_ARCH
   art_test_primary_suffix :=
   art_test_secondary_suffix :=
@@ -156,27 +154,15 @@
     art_test_primary_suffix := 64
     ART_PHONY_TEST_TARGET_SUFFIX := 64
     2ND_ART_PHONY_TEST_TARGET_SUFFIX := 32
-    ART_TARGET_BINARY_SUFFIX := 64
+    ART_TARGET_ARCH_32 := $(TARGET_2ND_ARCH)
+    ART_TARGET_ARCH_64 := $(TARGET_ARCH)
   else
     # TODO: ???
     $(error Do not know what to do with this multi-target configuration!)
   endif
-  # Primary with primary suffix
-  ART_NATIVETEST_DIR := $(ART_BASE_NATIVETEST_DIR)$(art_test_primary_suffix)
-  ART_NATIVETEST_OUT := $(ART_BASE_NATIVETEST_OUT)$(art_test_primary_suffix)
-  ART_TEST_DIR := $(ART_BASE_TEST_DIR)$(art_test_primary_suffix)
-  ART_TEST_OUT := $(ART_BASE_TEST_OUT)$(art_test_primary_suffix)
-  # Secondary with 2ND_ prefix and secondary suffix
-  2ND_ART_NATIVETEST_DIR := $(ART_BASE_NATIVETEST_DIR)$(art_test_secondary_suffix)
-  2ND_ART_NATIVETEST_OUT := $(ART_BASE_NATIVETEST_OUT)$(art_test_secondary_suffix)
-  2ND_ART_TEST_DIR := $(ART_BASE_TEST_DIR)$(art_test_secondary_suffix)
-  2ND_ART_TEST_OUT := $(ART_BASE_TEST_OUT)$(art_test_secondary_suffix)
 else
-  ART_NATIVETEST_DIR := $(ART_BASE_NATIVETEST_DIR)
-  ART_NATIVETEST_OUT := $(ART_BASE_NATIVETEST_OUT)
-  ART_TEST_DIR := $(ART_BASE_TEST_DIR)
-  ART_TEST_OUT := $(ART_BASE_TEST_OUT)
-  # No secondary
+  ART_TARGET_ARCH_32 := $(TARGET_ARCH)
+  ART_TARGET_ARCH_64 :=
 endif
 
 ART_CPP_EXTENSION := .cc
@@ -257,9 +243,6 @@
 endif
 ART_TARGET_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
 
-# TODO: remove when target no longer implies stlport.
-ART_TARGET_CFLAGS += -DART_WITH_STLPORT=1
-
 # DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES is set in ../build/core/dex_preopt.mk based on
 # the TARGET_CPU_VARIANT
 ifeq ($(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES),)
@@ -400,5 +383,24 @@
   endif
 endef
 
+HOST_CORE_OAT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.oat
+TARGET_CORE_OAT := $(ART_TEST_DIR)/$(DEX2OAT_TARGET_ARCH)/core.oat
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_OAT := $(2ND_ART_TEST_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
+endif
+
+HOST_CORE_OAT_OUT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.oat
+TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/$(DEX2OAT_TARGET_ARCH)/core.oat
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
+endif
+
+HOST_CORE_IMG_OUT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.art
+TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/$(DEX2OAT_TARGET_ARCH)/core.art
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.art
+endif
+
+HOST_CORE_IMG_LOCATION := $(HOST_OUT_JAVA_LIBRARIES)/core.art
 
 endif # ANDROID_COMMON_MK
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 6aa1c18..3c33975 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -56,7 +56,7 @@
   LOCAL_MODULE_TAGS := optional
   LOCAL_SRC_FILES := $$(art_source)
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime $$(art_c_includes)
-  LOCAL_SHARED_LIBRARIES += $$(art_shared_libraries) # libnativehelper
+  LOCAL_SHARED_LIBRARIES += $$(art_shared_libraries)
 
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := $$(art_executable)
@@ -100,12 +100,11 @@
   endif
 
   ifeq ($$(art_target_or_host),target)
-    include art/build/Android.libcxx.mk
+    include external/libcxx/libcxx.mk
     include $(BUILD_EXECUTABLE)
     ART_TARGET_EXECUTABLES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
   else # host
     LOCAL_IS_HOST_MODULE := true
-    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_EXECUTABLE)
     ART_HOST_EXECUTABLES := $(ART_HOST_EXECUTABLES) $(HOST_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
   endif
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index e9db47e..952f79a 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -81,6 +81,7 @@
 	compiler/optimizing/find_loops_test.cc \
 	compiler/optimizing/linearize_test.cc \
 	compiler/optimizing/liveness_test.cc \
+	compiler/optimizing/live_ranges_test.cc \
 	compiler/optimizing/pretty_printer_test.cc \
 	compiler/optimizing/ssa_test.cc \
 	compiler/output_stream_test.cc \
@@ -129,12 +130,12 @@
 # (1) Prefix for variables
 define build-art-test-make-target
 .PHONY: $$(art_gtest_target)$($(1)ART_PHONY_TEST_TARGET_SUFFIX)
-$$(art_gtest_target)$($(1)ART_PHONY_TEST_TARGET_SUFFIX): $($(1)ART_NATIVETEST_OUT)/$$(LOCAL_MODULE) test-art-target-sync
-	adb shell touch $($(1)ART_TEST_DIR)/$$@
-	adb shell rm $($(1)ART_TEST_DIR)/$$@
-	adb shell chmod 755 $($(1)ART_NATIVETEST_DIR)/$$(notdir $$<)
-	adb shell sh -c "$($(1)ART_NATIVETEST_DIR)/$$(notdir $$<) && touch $($(1)ART_TEST_DIR)/$$@"
-	$(hide) (adb pull $($(1)ART_TEST_DIR)/$$@ /tmp/ && echo $$@ PASSED) || (echo $$@ FAILED && exit 1)
+$$(art_gtest_target)$($(1)ART_PHONY_TEST_TARGET_SUFFIX): $(ART_NATIVETEST_OUT)/$(TARGET_$(1)ARCH)/$$(LOCAL_MODULE) test-art-target-sync
+	adb shell touch $(ART_TEST_DIR)/$(TARGET_$(1)ARCH)/$$@
+	adb shell rm $(ART_TEST_DIR)/$(TARGET_$(1)ARCH)/$$@
+	adb shell chmod 755 $(ART_NATIVETEST_DIR)/$(TARGET_$(1)ARCH)/$$(notdir $$<)
+	adb shell sh -c "$(ART_NATIVETEST_DIR)/$(TARGET_$(1)ARCH)/$$(notdir $$<) && touch $(ART_TEST_DIR)/$(TARGET_$(1)ARCH)/$$@"
+	$(hide) (adb pull $(ART_TEST_DIR)/$(TARGET_$(1)ARCH)/$$@ /tmp/ && echo $$@ PASSED) || (echo $$@ FAILED && exit 1)
 	$(hide) rm /tmp/$$@
 
   ART_TARGET_GTEST_TARGETS$($(1)ART_PHONY_TEST_TARGET_SUFFIX) += $$(art_gtest_target)$($(1)ART_PHONY_TEST_TARGET_SUFFIX)
@@ -186,20 +187,20 @@
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
     LOCAL_CFLAGS_x86 := $(ART_TARGET_CFLAGS_x86)
     LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
-    LOCAL_STATIC_LIBRARIES += libgtest
-    LOCAL_MODULE_PATH_32 := $(ART_BASE_NATIVETEST_OUT)
-    LOCAL_MODULE_PATH_64 := $(ART_BASE_NATIVETEST_OUT)64
+    LOCAL_STATIC_LIBRARIES += libgtest_libc++
+    LOCAL_MODULE_PATH_32 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_32)
+    LOCAL_MODULE_PATH_64 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
-    include art/build/Android.libcxx.mk
+    include external/libcxx/libcxx.mk
     include $(BUILD_EXECUTABLE)
     
-    ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$$(LOCAL_MODULE)
+    ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_ARCH)/$$(LOCAL_MODULE)
     art_gtest_target := test-art-$$(art_target_or_host)-gtest-$$(art_gtest_name)
 
     ifdef TARGET_2ND_ARCH
       $(call build-art-test-make-target,2ND_)
 
-      ART_TARGET_GTEST_EXECUTABLES$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) += $(2ND_ART_NATIVETEST_OUT)/$$(LOCAL_MODULE)
+      ART_TARGET_GTEST_EXECUTABLES$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_2ND_ARCH)/$$(LOCAL_MODULE)
 
       # Bind the primary to the non-suffix rule
       ifneq ($(ART_PHONY_TEST_TARGET_SUFFIX),)
@@ -219,7 +220,6 @@
     endif
     LOCAL_LDLIBS += -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
-    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_EXECUTABLE)
     art_gtest_exe := $(HOST_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
     ART_HOST_GTEST_EXECUTABLES += $$(art_gtest_exe)
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index 18d321a..6965326 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -20,6 +20,11 @@
 	test/StackWalk/stack_walk_jni.cc \
 	test/UnsafeTest/unsafe_test.cc
 
+ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
+ifdef TARGET_2ND_ARCH
+  ART_TARGET_LIBARTTEST_$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TEST_OUT)/$(TARGET_2ND_ARCH)/libarttest.so
+endif
+
 # $(1): target or host
 define build-libarttest
   ifneq ($(1),target)
@@ -48,10 +53,10 @@
     LOCAL_SHARED_LIBRARIES += libdl libcutils
     LOCAL_STATIC_LIBRARIES := libgtest
     LOCAL_MULTILIB := both
-    LOCAL_MODULE_PATH_32 := $(ART_BASE_TEST_OUT)
-    LOCAL_MODULE_PATH_64 := $(ART_BASE_TEST_OUT)64
+    LOCAL_MODULE_PATH_32 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_32)
+    LOCAL_MODULE_PATH_64 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
-    include art/build/Android.libcxx.mk
+    include external/libcxx/libcxx.mk
     include $(BUILD_SHARED_LIBRARY)
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
@@ -62,7 +67,6 @@
       LOCAL_LDLIBS += -lrt
     endif
     LOCAL_IS_HOST_MODULE := true
-    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_SHARED_LIBRARY)
   endif
 endef
diff --git a/build/Android.libcxx.mk b/build/Android.libcxx.mk
deleted file mode 100644
index f84e957..0000000
--- a/build/Android.libcxx.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-#
-# Copyright (C) 2014 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.libcxx.mk
-
-ifneq ($(LOCAL_IS_HOST_MODULE),true)
-  include external/stlport/libstlport.mk
-  LOCAL_CFLAGS += -DART_WITH_STLPORT=1
-  # include external/libcxx/libcxx.mk
-endif
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 9d7579d..bf07ecc 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -29,18 +29,6 @@
 HOST_CORE_DEX_FILES   := $(foreach jar,$(HOST_CORE_JARS),  $(call intermediates-dir-for,JAVA_LIBRARIES,$(jar),t,COMMON)/javalib.jar)
 TARGET_CORE_DEX_FILES := $(foreach jar,$(TARGET_CORE_JARS),$(call intermediates-dir-for,JAVA_LIBRARIES,$(jar), ,COMMON)/javalib.jar)
 
-HOST_CORE_OAT := $(HOST_OUT_JAVA_LIBRARIES)/core.oat
-TARGET_CORE_OAT := $(ART_TEST_DIR)/core.oat
-2ND_TARGET_CORE_OAT := $(2ND_ART_TEST_DIR)/core.oat
-
-HOST_CORE_OAT_OUT := $(HOST_OUT_JAVA_LIBRARIES)/core.oat
-TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/core.oat
-2ND_TARGET_CORE_OAT_OUT := $(2ND_ART_TEST_OUT)/core.oat
-
-HOST_CORE_IMG_OUT := $(HOST_OUT_JAVA_LIBRARIES)/core.art
-TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/core.art
-2ND_TARGET_CORE_IMG_OUT := $(2ND_ART_TEST_OUT)/core.art
-
 TARGET_INSTRUCTION_SET_FEATURES := $(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES)
 
 # Use dex2oat debug version for better error reporting
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 8592aaa..4108ba4 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -59,8 +59,8 @@
 	dex/mir_field_info.cc \
 	dex/mir_method_info.cc \
 	dex/mir_optimization.cc \
-	dex/pass_driver.cc \
 	dex/bb_optimizations.cc \
+	dex/pass_driver_me.cc \
 	dex/bit_vector_block_iterator.cc \
 	dex/frontend.cc \
 	dex/mir_graph.cc \
@@ -170,7 +170,6 @@
   ifeq ($$(art_target_or_host),host)
     LOCAL_IS_HOST_MODULE := true
   endif
-  include art/build/Android.libcxx.mk
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
@@ -196,6 +195,7 @@
 
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
   ifeq ($$(art_target_or_host),target)
+    include external/libcxx/libcxx.mk
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
   else # host
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 7a91e47..fb6c625 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -420,18 +420,18 @@
     image_reservation_.reset();
   }
 
-  UniquePtr<CompilerOptions> compiler_options_;
-  UniquePtr<VerificationResults> verification_results_;
-  UniquePtr<DexFileToMethodInlinerMap> method_inliner_map_;
-  UniquePtr<CompilerCallbacksImpl> callbacks_;
-  UniquePtr<CompilerDriver> compiler_driver_;
-  UniquePtr<CumulativeLogger> timer_;
+  std::unique_ptr<CompilerOptions> compiler_options_;
+  std::unique_ptr<VerificationResults> verification_results_;
+  std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
+  std::unique_ptr<CompilerCallbacksImpl> callbacks_;
+  std::unique_ptr<CompilerDriver> compiler_driver_;
+  std::unique_ptr<CumulativeLogger> timer_;
 
  private:
-  UniquePtr<MemMap> image_reservation_;
+  std::unique_ptr<MemMap> image_reservation_;
 
   // Chunks must not move their storage after being created - use the node-based std::list.
-  std::list<std::vector<uint8_t> > header_code_and_maps_chunks_;
+  std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index c3c9961..23cd250 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -17,12 +17,12 @@
 #ifndef ART_COMPILER_COMPILED_METHOD_H_
 #define ART_COMPILER_COMPILED_METHOD_H_
 
+#include <memory>
 #include <string>
 #include <vector>
 
 #include "instruction_set.h"
 #include "utils.h"
-#include "UniquePtrCompat.h"
 
 namespace llvm {
   class Function;
diff --git a/compiler/compilers.h b/compiler/compilers.h
index e523d64..2c231e1 100644
--- a/compiler/compilers.h
+++ b/compiler/compilers.h
@@ -92,7 +92,7 @@
                              const DexFile& dex_file) const;
 
  private:
-  UniquePtr<std::ostream> visualizer_output_;
+  std::unique_ptr<std::ostream> visualizer_output_;
 
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
 };
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index abfa7a7..1852f80 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -23,7 +23,13 @@
 /*
  * Code Layout pass implementation start.
  */
-bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool CodeLayout::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->LayoutBlocks(bb);
   // No need of repeating, so just return false.
   return false;
@@ -32,13 +38,22 @@
 /*
  * SSATransformation pass implementation start.
  */
-bool SSATransformation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool SSATransformation::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->InsertPhiNodeOperands(bb);
   // No need of repeating, so just return false.
   return false;
 }
 
-void SSATransformation::End(CompilationUnit* cUnit) const {
+void SSATransformation::End(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
   // Verify the dataflow information after the pass.
   if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) {
     cUnit->mir_graph->VerifyDataflow();
@@ -48,7 +63,13 @@
 /*
  * ConstantPropagation pass implementation start
  */
-bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool ConstantPropagation::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->DoConstantPropagation(bb);
   // No need of repeating, so just return false.
   return false;
@@ -57,7 +78,10 @@
 /*
  * MethodUseCount pass implementation start.
  */
-bool MethodUseCount::Gate(const CompilationUnit* cUnit) const {
+bool MethodUseCount::Gate(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
   // First initialize the data.
   cUnit->mir_graph->InitializeMethodUses();
 
@@ -67,7 +91,13 @@
   return res;
 }
 
-bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool MethodUseCount::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->CountUses(bb);
   // No need of repeating, so just return false.
   return false;
@@ -76,7 +106,13 @@
 /*
  * BasicBlock Combine pass implementation start.
  */
-bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool BBCombine::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->CombineBlocks(bb);
 
   // No need of repeating, so just return false.
@@ -86,7 +122,10 @@
 /*
  * BasicBlock Optimization pass implementation start.
  */
-void BBOptimizations::Start(CompilationUnit* cUnit) const {
+void BBOptimizations::Start(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
   /*
    * This pass has a different ordering depEnding on the suppress exception,
    * so do the pass here for now:
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 6d500a5..43dcdf4 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
 
 #include "compiler_internals.h"
-#include "pass.h"
+#include "pass_me.h"
 
 namespace art {
 
@@ -26,16 +26,22 @@
  * @class CacheFieldLoweringInfo
  * @brief Cache the lowering info for fields used by IGET/IPUT/SGET/SPUT insns.
  */
-class CacheFieldLoweringInfo : public Pass {
+class CacheFieldLoweringInfo : public PassME {
  public:
-  CacheFieldLoweringInfo() : Pass("CacheFieldLoweringInfo", kNoNodes) {
+  CacheFieldLoweringInfo() : PassME("CacheFieldLoweringInfo", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheFieldLoweringInfo();
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->HasFieldAccess();
   }
 };
@@ -44,16 +50,22 @@
  * @class CacheMethodLoweringInfo
  * @brief Cache the lowering info for methods called by INVOKEs.
  */
-class CacheMethodLoweringInfo : public Pass {
+class CacheMethodLoweringInfo : public PassME {
  public:
-  CacheMethodLoweringInfo() : Pass("CacheMethodLoweringInfo", kNoNodes) {
+  CacheMethodLoweringInfo() : PassME("CacheMethodLoweringInfo", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheMethodLoweringInfo();
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->HasInvokes();
   }
 };
@@ -62,26 +74,41 @@
  * @class CallInlining
  * @brief Perform method inlining pass.
  */
-class CallInlining : public Pass {
+class CallInlining : public PassME {
  public:
-  CallInlining() : Pass("CallInlining") {
+  CallInlining() : PassME("CallInlining") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->InlineCallsGate();
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsStart();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     cUnit->mir_graph->InlineCalls(bb);
     // No need of repeating, so just return false.
     return false;
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsEnd();
   }
 };
@@ -90,48 +117,57 @@
  * @class CodeLayout
  * @brief Perform the code layout pass.
  */
-class CodeLayout : public Pass {
+class CodeLayout : public PassME {
  public:
-  CodeLayout() : Pass("CodeLayout", "2_post_layout_cfg") {
+  CodeLayout() : PassME("CodeLayout", "2_post_layout_cfg") {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->VerifyDataflow();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 };
 
 /**
  * @class SSATransformation
  * @brief Perform an SSA representation pass on the CompilationUnit.
  */
-class SSATransformation : public Pass {
+class SSATransformation : public PassME {
  public:
-  SSATransformation() : Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
+  SSATransformation() : PassME("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InitializeSSATransformation();
   }
 
-  void End(CompilationUnit* cUnit) const;
+  void End(const PassDataHolder* data) const;
 };
 
 /**
  * @class ConstantPropagation
  * @brief Perform a constant propagation pass.
  */
-class ConstantPropagation : public Pass {
+class ConstantPropagation : public PassME {
  public:
-  ConstantPropagation() : Pass("ConstantPropagation") {
+  ConstantPropagation() : PassME("ConstantPropagation") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InitializeConstantPropagation();
   }
 };
@@ -140,12 +176,15 @@
  * @class InitRegLocations
  * @brief Initialize Register Locations.
  */
-class InitRegLocations : public Pass {
+class InitRegLocations : public PassME {
  public:
-  InitRegLocations() : Pass("InitRegLocation", kNoNodes) {
+  InitRegLocations() : PassME("InitRegLocation", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InitRegLocations();
   }
 };
@@ -154,53 +193,77 @@
  * @class MethodUseCount
  * @brief Count the register uses of the method
  */
-class MethodUseCount : public Pass {
+class MethodUseCount : public PassME {
  public:
-  MethodUseCount() : Pass("UseCount") {
+  MethodUseCount() : PassME("UseCount") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  bool Gate(const CompilationUnit* cUnit) const;
+  bool Gate(const PassDataHolder* data) const;
 };
 
 /**
  * @class NullCheckEliminationAndTypeInference
  * @brief Null check elimination and type inference.
  */
-class NullCheckEliminationAndTypeInference : public Pass {
+class NullCheckEliminationAndTypeInference : public PassME {
  public:
   NullCheckEliminationAndTypeInference()
-    : Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
+    : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesStart();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb);
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesEnd();
   }
 };
 
-class ClassInitCheckElimination : public Pass {
+class ClassInitCheckElimination : public PassME {
  public:
-  ClassInitCheckElimination() : Pass("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
+  ClassInitCheckElimination() : PassME("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->EliminateClassInitChecksGate();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     return cUnit->mir_graph->EliminateClassInitChecks(bb);
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateClassInitChecksEnd();
   }
 };
@@ -209,32 +272,38 @@
  * @class NullCheckEliminationAndTypeInference
  * @brief Null check elimination and type inference.
  */
-class BBCombine : public Pass {
+class BBCombine : public PassME {
  public:
-  BBCombine() : Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
+  BBCombine() : PassME("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return ((cUnit->disable_opt & (1 << kSuppressExceptionEdges)) != 0);
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 };
 
 /**
  * @class BasicBlock Optimizations
  * @brief Any simple BasicBlock optimization can be put here.
  */
-class BBOptimizations : public Pass {
+class BBOptimizations : public PassME {
  public:
-  BBOptimizations() : Pass("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
+  BBOptimizations() : PassME("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return ((cUnit->disable_opt & (1 << kBBOpt)) == 0);
   }
 
-  void Start(CompilationUnit* cUnit) const;
+  void Start(const PassDataHolder* data) const;
 };
 
 }  // namespace art
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 70159ca..35d777e 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -85,8 +85,8 @@
   ArenaAllocator arena;
   ArenaStack arena_stack;  // Arenas for ScopedArenaAllocator.
 
-  UniquePtr<MIRGraph> mir_graph;   // MIR container.
-  UniquePtr<Backend> cg;           // Target-specific codegen.
+  std::unique_ptr<MIRGraph> mir_graph;   // MIR container.
+  std::unique_ptr<Backend> cg;           // Target-specific codegen.
   TimingLogger timings;
 };
 
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 3bc060b..ec2556b 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -21,7 +21,7 @@
 #include "dataflow_iterator-inl.h"
 #include "leb128.h"
 #include "mirror/object.h"
-#include "pass_driver.h"
+#include "pass_driver_me.h"
 #include "runtime.h"
 #include "base/logging.h"
 #include "base/timing_logger.h"
@@ -75,6 +75,7 @@
   // (1 << kDebugShowSummaryMemoryUsage) |
   // (1 << kDebugShowFilterStats) |
   // (1 << kDebugTimings) |
+  // (1 << kDebugCodegenDump) |
   0;
 
 CompilationUnit::CompilationUnit(ArenaPool* pool)
@@ -852,6 +853,10 @@
     }
   }
 
+  if (cu.verbose) {
+    cu.enable_debug |= (1 << kDebugCodegenDump);
+  }
+
   /*
    * TODO: rework handling of optimization and debug flags.  Should we split out
    * MIR and backend flags?  Need command-line setting as well.
@@ -877,6 +882,7 @@
   if (cu.instruction_set == kArm64) {
     // TODO(Arm64): enable optimizations once backend is mature enough.
     cu.disable_opt = ~(uint32_t)0;
+    cu.enable_debug |= (1 << kDebugCodegenDump);
   }
 
   cu.StartTimingSplit("BuildMIRGraph");
@@ -918,7 +924,7 @@
   }
 
   /* Create the pass driver and launch it */
-  PassDriver pass_driver(&cu);
+  PassDriverME pass_driver(&cu);
   pass_driver.Launch();
 
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index f714ecd..9e376ee 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -76,7 +76,8 @@
   kDebugVerifyBitcode,
   kDebugShowSummaryMemoryUsage,
   kDebugShowFilterStats,
-  kDebugTimings
+  kDebugTimings,
+  kDebugCodegenDump
 };
 
 class LLVMInfo {
@@ -101,10 +102,10 @@
     }
 
   private:
-    UniquePtr< ::llvm::LLVMContext> llvm_context_;
+    std::unique_ptr< ::llvm::LLVMContext> llvm_context_;
     ::llvm::Module* llvm_module_;  // Managed by context_.
-    UniquePtr<art::llvm::IntrinsicHelper> intrinsic_helper_;
-    UniquePtr<art::llvm::IRBuilder> ir_builder_;
+    std::unique_ptr<art::llvm::IntrinsicHelper> intrinsic_helper_;
+    std::unique_ptr<art::llvm::IRBuilder> ir_builder_;
 };
 
 class CompiledMethod;
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 7049f8c..0c2b6a7 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -17,8 +17,9 @@
 #ifndef ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_
 #define ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_
 
+#include <memory>
+
 #include "compiler_internals.h"
-#include "UniquePtrCompat.h"
 #include "utils/scoped_arena_allocator.h"
 #include "utils/scoped_arena_containers.h"
 
@@ -89,7 +90,7 @@
 
  public:
   static LocalValueNumbering* Create(CompilationUnit* cu) {
-    UniquePtr<ScopedArenaAllocator> allocator(ScopedArenaAllocator::Create(&cu->arena_stack));
+    std::unique_ptr<ScopedArenaAllocator> allocator(ScopedArenaAllocator::Create(&cu->arena_stack));
     void* addr = allocator->Alloc(sizeof(LocalValueNumbering), kArenaAllocMisc);
     return new(addr) LocalValueNumbering(cu, allocator.release());
   }
@@ -195,7 +196,7 @@
   void HandlePutObject(MIR* mir);
 
   CompilationUnit* const cu_;
-  UniquePtr<ScopedArenaAllocator> allocator_;
+  std::unique_ptr<ScopedArenaAllocator> allocator_;
   SregValueMap sreg_value_map_;
   SregValueMap sreg_wide_value_map_;
   ValueMap value_map_;
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index ebac871..e56e016 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -144,7 +144,6 @@
       mir->ssa_rep->fp_def = nullptr;  // Not used by LVN.
       mir->dalvikInsn.opcode = def->opcode;
       mir->offset = i;  // LVN uses offset only for debug output
-      mir->width = 1u;  // Not used by LVN.
       mir->optimization_flags = 0u;
 
       if (i != 0u) {
@@ -181,7 +180,7 @@
   MIR* mirs_;
   std::vector<SSARepresentation> ssa_reps_;
   std::vector<uint16_t> value_names_;
-  UniquePtr<LocalValueNumbering> lvn_;
+  std::unique_ptr<LocalValueNumbering> lvn_;
 };
 
 TEST_F(LocalValueNumberingTest, TestIGetIGetInvokeIGet) {
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 1c9e2e2..508f1c7 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -15,6 +15,8 @@
  */
 
 #include <algorithm>
+#include <memory>
+
 #include "compiler_internals.h"
 #include "dataflow_iterator-inl.h"
 #include "dex_instruction.h"
@@ -23,7 +25,6 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "UniquePtrCompat.h"
 #include "utils/scoped_arena_containers.h"
 
 namespace art {
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index ba4224e..4ba6677 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -196,7 +196,7 @@
   }
 
   orig_block->last_mir_insn = prev;
-  prev->next = NULL;
+  prev->next = nullptr;
 
   /*
    * Update the immediate predecessor block pointer so that outgoing edges
@@ -220,6 +220,7 @@
   while (p != bottom_block->last_mir_insn) {
     p = p->next;
     DCHECK(p != nullptr);
+    p->bb = bottom_block->id;
     int opcode = p->dalvikInsn.opcode;
     /*
      * Some messiness here to ensure that we only enter real opcodes and only the
@@ -543,7 +544,7 @@
   new_block->start_offset = insn->offset;
   cur_block->fall_through = new_block->id;
   new_block->predecessors->Insert(cur_block->id);
-  MIR* new_insn = static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
+  MIR* new_insn = NewMIR();
   *new_insn = *insn;
   insn->dalvikInsn.opcode =
       static_cast<Instruction::Code>(kMirOpCheck);
@@ -629,11 +630,10 @@
 
   /* Parse all instructions and put them into containing basic blocks */
   while (code_ptr < code_end) {
-    MIR *insn = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
+    MIR *insn = NewMIR();
     insn->offset = current_offset_;
     insn->m_unit_index = current_method_;
     int width = ParseInsn(code_ptr, &insn->dalvikInsn);
-    insn->width = width;
     Instruction::Code opcode = insn->dalvikInsn.opcode;
     if (opcode_count_ != NULL) {
       opcode_count_[static_cast<int>(opcode)]++;
@@ -924,7 +924,7 @@
   fclose(file);
 }
 
-/* Insert an MIR instruction to the end of a basic block */
+/* Insert an MIR instruction to the end of a basic block. */
 void BasicBlock::AppendMIR(MIR* mir) {
   if (first_mir_insn == nullptr) {
     DCHECK(last_mir_insn == nullptr);
@@ -935,9 +935,11 @@
     mir->next = nullptr;
     last_mir_insn = mir;
   }
+
+  mir->bb = id;
 }
 
-/* Insert an MIR instruction to the head of a basic block */
+/* Insert an MIR instruction to the head of a basic block. */
 void BasicBlock::PrependMIR(MIR* mir) {
   if (first_mir_insn == nullptr) {
     DCHECK(last_mir_insn == nullptr);
@@ -947,17 +949,53 @@
     mir->next = first_mir_insn;
     first_mir_insn = mir;
   }
+
+  mir->bb = id;
 }
 
-/* Insert a MIR instruction after the specified MIR */
+/* Insert a MIR instruction after the specified MIR. */
 void BasicBlock::InsertMIRAfter(MIR* current_mir, MIR* new_mir) {
   new_mir->next = current_mir->next;
   current_mir->next = new_mir;
 
   if (last_mir_insn == current_mir) {
-    /* Is the last MIR in the block */
+    /* Is the last MIR in the block? */
     last_mir_insn = new_mir;
   }
+
+  new_mir->bb = id;
+}
+
+MIR* BasicBlock::FindPreviousMIR(MIR* mir) {
+  MIR* current = first_mir_insn;
+
+  while (current != nullptr) {
+    MIR* next = current->next;
+
+    if (next == mir) {
+      return current;
+    }
+
+    current = next;
+  }
+
+  return nullptr;
+}
+
+void BasicBlock::InsertMIRBefore(MIR* current_mir, MIR* new_mir) {
+  if (first_mir_insn == current_mir) {
+    /* Is the first MIR in the block? */
+    first_mir_insn = new_mir;
+    new_mir->bb = id;
+  }
+
+  MIR* prev = FindPreviousMIR(current_mir);
+
+  if (prev != nullptr) {
+    prev->next = new_mir;
+    new_mir->next = current_mir;
+    new_mir->bb = id;
+  }
 }
 
 MIR* BasicBlock::GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current) {
@@ -1240,6 +1278,12 @@
   return info;
 }
 
+// Allocate a new MIR.
+MIR* MIRGraph::NewMIR() {
+  MIR* mir = new (arena_) MIR();
+  return mir;
+}
+
 // Allocate a new basic block.
 BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) {
   BasicBlock* bb = static_cast<BasicBlock*>(arena_->Alloc(sizeof(BasicBlock),
@@ -1344,4 +1388,106 @@
   return nullptr;
 }
 
+bool BasicBlock::RemoveMIR(MIR* mir) {
+  if (mir == nullptr) {
+    return false;
+  }
+
+  // Find the MIR, and the one before it if they exist.
+  MIR* current = nullptr;
+  MIR* prev = nullptr;
+
+  // Find the mir we are looking for.
+  for (current = first_mir_insn; current != nullptr; prev = current, current = current->next) {
+    if (current == mir) {
+      break;
+    }
+  }
+
+  // Did we find it?
+  if (current != nullptr) {
+    MIR* next = current->next;
+
+    // Just update the links of prev and next and current is almost gone.
+    if (prev != nullptr) {
+      prev->next = next;
+    }
+
+    // Exceptions are if first or last mirs are invoke.
+    if (first_mir_insn == current) {
+      first_mir_insn = next;
+    }
+
+    if (last_mir_insn == current) {
+      last_mir_insn = prev;
+    }
+
+    // Found it and removed it.
+    return true;
+  }
+
+  // We did not find it.
+  return false;
+}
+
+MIR* MIR::Copy(MIRGraph* mir_graph) {
+  MIR* res = mir_graph->NewMIR();
+  *res = *this;
+
+  // Remove links
+  res->next = nullptr;
+  res->bb = NullBasicBlockId;
+  res->ssa_rep = nullptr;
+
+  return res;
+}
+
+MIR* MIR::Copy(CompilationUnit* c_unit) {
+  return Copy(c_unit->mir_graph.get());
+}
+
+uint32_t SSARepresentation::GetStartUseIndex(Instruction::Code opcode) {
+  // Default result.
+  int res = 0;
+
+  // We are basically setting the iputs to their igets counterparts.
+  switch (opcode) {
+    case Instruction::IPUT:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_SHORT:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_OBJECT_QUICK:
+    case Instruction::APUT:
+    case Instruction::APUT_OBJECT:
+    case Instruction::APUT_BOOLEAN:
+    case Instruction::APUT_BYTE:
+    case Instruction::APUT_CHAR:
+    case Instruction::APUT_SHORT:
+    case Instruction::SPUT:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT:
+      // Skip the VR containing what to store.
+      res = 1;
+      break;
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::APUT_WIDE:
+    case Instruction::SPUT_WIDE:
+      // Skip the two VRs containing what to store.
+      res = 2;
+      break;
+    default:
+      // Do nothing in the general case.
+      break;
+  }
+
+  return res;
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 85a2d04..0bb8265 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -242,6 +242,8 @@
   bool* fp_use;
   int32_t* defs;
   bool* fp_def;
+
+  static uint32_t GetStartUseIndex(Instruction::Code opcode);
 };
 
 /*
@@ -261,12 +263,15 @@
     uint32_t vC;
     uint32_t arg[5];         /* vC/D/E/F/G in invoke or filled-new-array */
     Instruction::Code opcode;
+
+    explicit DecodedInstruction():vA(0), vB(0), vB_wide(0), vC(0), opcode(Instruction::NOP) {
+    }
   } dalvikInsn;
 
-  uint16_t width;                 // Note: width can include switch table or fill array data.
   NarrowDexOffset offset;         // Offset of the instruction in code units.
   uint16_t optimization_flags;
   int16_t m_unit_index;           // From which method was this MIR included
+  BasicBlockId bb;
   MIR* next;
   SSARepresentation* ssa_rep;
   union {
@@ -285,6 +290,23 @@
     // INVOKE data index, points to MIRGraph::method_lowering_infos_.
     uint32_t method_lowering_info;
   } meta;
+
+  explicit MIR():offset(0), optimization_flags(0), m_unit_index(0), bb(NullBasicBlockId),
+                 next(nullptr), ssa_rep(nullptr) {
+    memset(&meta, 0, sizeof(meta));
+  }
+
+  uint32_t GetStartUseIndex() const {
+    return SSARepresentation::GetStartUseIndex(dalvikInsn.opcode);
+  }
+
+  MIR* Copy(CompilationUnit *c_unit);
+  MIR* Copy(MIRGraph* mir_Graph);
+
+  static void* operator new(size_t size, ArenaAllocator* arena) {
+    return arena->Alloc(sizeof(MIR), kArenaAllocMIR);
+  }
+  static void operator delete(void* p) {}  // Nop.
 };
 
 struct SuccessorBlockInfo;
@@ -319,6 +341,8 @@
   void AppendMIR(MIR* mir);
   void PrependMIR(MIR* mir);
   void InsertMIRAfter(MIR* current_mir, MIR* new_mir);
+  void InsertMIRBefore(MIR* current_mir, MIR* new_mir);
+  MIR* FindPreviousMIR(MIR* mir);
 
   /**
    * @brief Used to obtain the next MIR that follows unconditionally.
@@ -329,6 +353,7 @@
    * @return Returns the following MIR if one can be found.
    */
   MIR* GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current);
+  bool RemoveMIR(MIR* mir);
 };
 
 /*
@@ -836,6 +861,7 @@
   void DumpMIRGraph();
   CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range);
   BasicBlock* NewMemBB(BBType block_type, int block_id);
+  MIR* NewMIR();
   MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir);
   BasicBlock* NextDominatedBlock(BasicBlock* bb);
   bool LayoutBlocks(BasicBlock* bb);
@@ -902,7 +928,11 @@
   static const char* extended_mir_op_names_[kMirOpLast - kMirOpFirst];
   static const uint32_t analysis_attributes_[kMirOpLast];
 
- private:
+  void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
+  bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
+  void ComputeDFSOrders();
+
+ protected:
   int FindCommonParent(int block1, int block2);
   void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
                          const ArenaBitVector* src2);
@@ -928,17 +958,14 @@
                               const uint16_t* code_end);
   int AddNewSReg(int v_reg);
   void HandleSSAUse(int* uses, int dalvik_reg, int reg_index);
-  void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
   void DataFlowSSAFormat35C(MIR* mir);
   void DataFlowSSAFormat3RC(MIR* mir);
   bool FindLocalLiveIn(BasicBlock* bb);
-  bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
   bool VerifyPredInfo(BasicBlock* bb);
   BasicBlock* NeedsVisit(BasicBlock* bb);
   BasicBlock* NextUnvisitedSuccessor(BasicBlock* bb);
   void MarkPreOrder(BasicBlock* bb);
   void RecordDFSOrders(BasicBlock* bb);
-  void ComputeDFSOrders();
   void ComputeDefBlockMatrix();
   void ComputeDomPostOrderTraversal(BasicBlock* bb);
   void ComputeDominators();
@@ -979,7 +1006,7 @@
   int* i_dom_list_;
   ArenaBitVector** def_block_matrix_;    // num_dalvik_register x num_blocks.
   ArenaBitVector* temp_dalvik_register_v_;
-  UniquePtr<ScopedArenaAllocator> temp_scoped_alloc_;
+  std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_;
   uint16_t* temp_insn_data_;
   uint32_t temp_bit_vector_size_;
   ArenaBitVector* temp_bit_vector_;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 8e8a593..749a235 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -311,7 +311,7 @@
     return true;
   }
   bool use_lvn = bb->use_lvn;
-  UniquePtr<LocalValueNumbering> local_valnum;
+  std::unique_ptr<LocalValueNumbering> local_valnum;
   if (use_lvn) {
     local_valnum.reset(LocalValueNumbering::Create(cu_));
   }
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 891d9fb..86092b6 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -170,7 +170,6 @@
       }
       mir->ssa_rep = nullptr;
       mir->offset = 2 * i;  // All insns need to be at least 2 code units long.
-      mir->width = 2u;
       mir->optimization_flags = 0u;
       merged_df_flags |= MIRGraph::GetDataFlowAttributes(def->opcode);
     }
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index 9457d5b..ac22294 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -19,49 +19,21 @@
 
 #include <string>
 
+#include "base/macros.h"
 namespace art {
 
-// Forward declarations.
-struct BasicBlock;
-struct CompilationUnit;
-class Pass;
-
-/**
- * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass.
- * @details Each enum should be a power of 2 to be correctly used.
- */
-enum OptimizationFlag {
-};
-
-enum DataFlowAnalysisMode {
-  kAllNodes = 0,                           /**< @brief All nodes. */
-  kPreOrderDFSTraversal,                   /**< @brief Depth-First-Search / Pre-Order. */
-  kRepeatingPreOrderDFSTraversal,          /**< @brief Depth-First-Search / Repeating Pre-Order. */
-  kReversePostOrderDFSTraversal,           /**< @brief Depth-First-Search / Reverse Post-Order. */
-  kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
-  kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
-  kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
-  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
+// Empty Pass Data Class, can be extended by any pass extending the base Pass class.
+class PassDataHolder {
 };
 
 /**
  * @class Pass
- * @brief Pass is the Pass structure for the optimizations.
- * @details The following structure has the different optimization passes that we are going to do.
+ * @brief Base Pass class, can be extended to perform a more defined way of doing the work call.
  */
 class Pass {
  public:
-  explicit Pass(const char* name, DataFlowAnalysisMode type = kAllNodes,
-                unsigned int flags = 0u, const char* dump = "")
-    : pass_name_(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
-  }
-
-  Pass(const char* name, DataFlowAnalysisMode type, const char* dump)
-    : pass_name_(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
-  }
-
-  Pass(const char* name, const char* dump)
-    : pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  explicit Pass(const char* name)
+    : pass_name_(name) {
   }
 
   virtual ~Pass() {
@@ -71,59 +43,42 @@
     return pass_name_;
   }
 
-  virtual DataFlowAnalysisMode GetTraversal() const {
-    return traversal_type_;
-  }
-
-  virtual bool GetFlag(OptimizationFlag flag) const {
-    return (flags_ & flag);
-  }
-
-  const char* GetDumpCFGFolder() const {
-    return dump_cfg_folder_;
-  }
-
   /**
    * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit
-   * @param c_unit the CompilationUnit.
-   * @return whether or not to execute the pass
+   * @param data the PassDataHolder.
+   * @return whether or not to execute the pass.
    */
-  virtual bool Gate(const CompilationUnit* c_unit) const {
+  virtual bool Gate(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
 
     // Base class says yes.
     return true;
   }
 
   /**
-   * @brief Start of the pass: called before the WalkBasicBlocks function
-   * @param c_unit the considered CompilationUnit.
+   * @brief Start of the pass: called before the Worker function.
    */
-  virtual void Start(CompilationUnit* c_unit) const {
+  virtual void Start(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
   }
 
   /**
-   * @brief End of the pass: called after the WalkBasicBlocks function
-   * @param c_unit the considered CompilationUnit.
+   * @brief End of the pass: called after the WalkBasicBlocks function.
    */
-  virtual void End(CompilationUnit* c_unit) const {
+  virtual void End(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
   }
 
   /**
-   * @brief Actually walk the BasicBlocks following a particular traversal type.
-   * @param c_unit the CompilationUnit.
-   * @param bb the BasicBlock.
+   * @param data the object containing data necessary for the pass.
    * @return whether or not there is a change when walking the BasicBlock
    */
-  virtual bool WalkBasicBlocks(CompilationUnit* c_unit, BasicBlock* bb) const {
-    // Unused parameters.
-    UNUSED(c_unit);
-    UNUSED(bb);
+  virtual bool Worker(const PassDataHolder* data) const {
+    // Unused parameter.
+    UNUSED(data);
 
     // BasicBlock did not change.
     return false;
@@ -133,15 +88,6 @@
   /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */
   const char* const pass_name_;
 
-  /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */
-  const DataFlowAnalysisMode traversal_type_;
-
-  /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */
-  const unsigned int flags_;
-
-  /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */
-  const char* const dump_cfg_folder_;
-
  private:
   // In order to make the all passes not copy-friendly.
   DISALLOW_COPY_AND_ASSIGN(Pass);
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index 2b7196e..aa0d1ae 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -22,77 +22,169 @@
 #include "safe_map.h"
 
 // Forward Declarations.
-class CompilationUnit;
 class Pass;
-
+class PassDriver;
 namespace art {
+/**
+ * @brief Helper function to create a single instance of a given Pass and can be shared across
+ * the threads.
+ */
+template <typename PassType>
+const Pass* GetPassInstance() {
+  static const PassType pass;
+  return &pass;
+}
+
+// Empty holder for the constructor.
+class PassDriverDataHolder {
+};
 
 /**
  * @class PassDriver
- * @brief PassDriver is the wrapper around all Pass instances in order to execute them from the Middle-End
+ * @brief PassDriver is the wrapper around all Pass instances in order to execute them
  */
+template <typename PassDriverType>
 class PassDriver {
  public:
-  explicit PassDriver(CompilationUnit* cu, bool create_default_passes = true);
+  explicit PassDriver() {
+    InitializePasses();
+  }
 
-  ~PassDriver();
+  virtual ~PassDriver() {
+  }
 
   /**
    * @brief Insert a Pass: can warn if multiple passes have the same name.
-   * @param new_pass the new Pass to insert in the map and list.
-   * @param warn_override warn if the name of the Pass is already used.
    */
-  void InsertPass(const Pass* new_pass);
+  void InsertPass(const Pass* new_pass) {
+    DCHECK(new_pass != nullptr);
+    DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0);
+
+    // It is an error to override an existing pass.
+    DCHECK(GetPass(new_pass->GetName()) == nullptr)
+        << "Pass name " << new_pass->GetName() << " already used.";
+
+    // Now add to the list.
+    pass_list_.push_back(new_pass);
+  }
 
   /**
    * @brief Run a pass using the name as key.
-   * @param c_unit the considered CompilationUnit.
-   * @param pass_name the Pass name.
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const char* pass_name);
+  virtual bool RunPass(const char* pass_name) {
+    // Paranoid: c_unit cannot be nullptr and we need a pass name.
+    DCHECK(pass_name != nullptr && pass_name[0] != 0);
+
+    const Pass* cur_pass = GetPass(pass_name);
+
+    if (cur_pass != nullptr) {
+      return RunPass(cur_pass);
+    }
+
+    // Return false, we did not find the pass.
+    return false;
+  }
+
+  /**
+   * @brief Runs all the passes with the pass_list_.
+   */
+  void Launch() {
+    for (const Pass* cur_pass : pass_list_) {
+      RunPass(cur_pass);
+    }
+  }
+
+  /**
+   * @brief Searches for a particular pass.
+   * @param the name of the pass to be searched for.
+   */
+  const Pass* GetPass(const char* name) const {
+    for (const Pass* cur_pass : pass_list_) {
+      if (strcmp(name, cur_pass->GetName()) == 0) {
+        return cur_pass;
+      }
+    }
+    return nullptr;
+  }
+
+  static void CreateDefaultPassList(const std::string& disable_passes) {
+    // Insert each pass from g_passes into g_default_pass_list.
+    PassDriverType::g_default_pass_list.clear();
+    PassDriverType::g_default_pass_list.reserve(PassDriver<PassDriverType>::g_passes_size);
+    for (uint16_t i = 0; i < PassDriver<PassDriverType>::g_passes_size; ++i) {
+      const Pass* pass = PassDriver<PassDriverType>::g_passes[i];
+      // Check if we should disable this pass.
+      if (disable_passes.find(pass->GetName()) != std::string::npos) {
+        LOG(INFO) << "Skipping " << pass->GetName();
+      } else {
+        PassDriver<PassDriverType>::g_default_pass_list.push_back(pass);
+      }
+    }
+  }
 
   /**
    * @brief Run a pass using the Pass itself.
    * @param time_split do we want a time split request(default: false)?
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split = false);
+  virtual bool RunPass(const Pass* pass, bool time_split = false) = 0;
 
-  void Launch();
+  /**
+   * @brief Print the pass names of all the passes available.
+   */
+  static void PrintPassNames() {
+    LOG(INFO) << "Loop Passes are:";
 
-  void HandlePassFlag(CompilationUnit* c_unit, const Pass* pass);
+    for (const Pass* cur_pass : PassDriver<PassDriverType>::g_default_pass_list) {
+      LOG(INFO) << "\t-" << cur_pass->GetName();
+    }
+  }
+
+ protected:
+  /**
+   * @brief Gets the list of passes currently schedule to execute.
+   * @return pass_list_
+   */
+  std::vector<const Pass*>& GetPasses() {
+    return pass_list_;
+  }
+
+  virtual void InitializePasses() {
+    SetDefaultPasses();
+  }
+
+  void SetDefaultPasses() {
+    pass_list_ = PassDriver<PassDriverType>::g_default_pass_list;
+  }
 
   /**
    * @brief Apply a patch: perform start/work/end functions.
    */
-  void ApplyPass(CompilationUnit* c_unit, const Pass* pass);
-
-  /**
-   * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode
-   */
-  void DispatchPass(CompilationUnit* c_unit, const Pass* pass);
-
-  static void PrintPassNames();
-  static void CreateDefaultPassList(const std::string& disable_passes);
-
-  const Pass* GetPass(const char* name) const;
-
-  const char* GetDumpCFGFolder() const {
-    return dump_cfg_folder_;
+  virtual void ApplyPass(PassDataHolder* data, const Pass* pass) {
+    pass->Start(data);
+    DispatchPass(pass);
+    pass->End(data);
   }
-
- protected:
-  void CreatePasses();
+  /**
+   * @brief Dispatch a patch.
+   * Gives the ability to add logic when running the patch.
+   */
+  virtual void DispatchPass(const Pass* pass) {
+    UNUSED(pass);
+  }
 
   /** @brief List of passes: provides the order to execute the passes. */
   std::vector<const Pass*> pass_list_;
 
-  /** @brief The CompilationUnit on which to execute the passes on. */
-  CompilationUnit* const cu_;
+  /** @brief The number of passes within g_passes.  */
+  static const uint16_t g_passes_size;
 
-  /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */
-  const char* dump_cfg_folder_;
+  /** @brief The number of passes within g_passes.  */
+  static const Pass* const g_passes[];
+
+  /** @brief The default pass list is used to initialize pass_list_. */
+  static std::vector<const Pass*> g_default_pass_list;
 };
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver_me.cc b/compiler/dex/pass_driver_me.cc
new file mode 100644
index 0000000..d054500
--- /dev/null
+++ b/compiler/dex/pass_driver_me.cc
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/macros.h"
+#include "bb_optimizations.h"
+#include "compiler_internals.h"
+#include "dataflow_iterator.h"
+#include "dataflow_iterator-inl.h"
+#include "pass_driver_me.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass, DataflowIterator* iterator) {
+  // Paranoid: Check the iterator before walking the BasicBlocks.
+  DCHECK(iterator != nullptr);
+  bool change = false;
+  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
+    data->bb = bb;
+    change = pass->Worker(data);
+  }
+}
+
+template <typename Iterator>
+inline void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) {
+  DCHECK(data != nullptr);
+  CompilationUnit* c_unit = data->c_unit;
+  DCHECK(c_unit != nullptr);
+  Iterator iterator(c_unit->mir_graph.get());
+  DoWalkBasicBlocks(data, pass, &iterator);
+}
+}  // anonymous namespace
+
+/*
+ * Create the pass list. These passes are immutable and are shared across the threads.
+ *
+ * Advantage is that there will be no race conditions here.
+ * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
+ *   - This is not yet an issue: no current pass would require it.
+ */
+// The initial list of passes to be used by the PassDriveME.
+template<>
+const Pass* const PassDriver<PassDriverME>::g_passes[] = {
+  GetPassInstance<CacheFieldLoweringInfo>(),
+  GetPassInstance<CacheMethodLoweringInfo>(),
+  GetPassInstance<CallInlining>(),
+  GetPassInstance<CodeLayout>(),
+  GetPassInstance<SSATransformation>(),
+  GetPassInstance<ConstantPropagation>(),
+  GetPassInstance<InitRegLocations>(),
+  GetPassInstance<MethodUseCount>(),
+  GetPassInstance<NullCheckEliminationAndTypeInference>(),
+  GetPassInstance<ClassInitCheckElimination>(),
+  GetPassInstance<BBCombine>(),
+  GetPassInstance<BBOptimizations>(),
+};
+
+// The number of the passes in the initial list of Passes (g_passes).
+template<>
+uint16_t const PassDriver<PassDriverME>::g_passes_size = arraysize(PassDriver<PassDriverME>::g_passes);
+
+// The default pass list is used by the PassDriverME instance of PassDriver to initialize pass_list_.
+template<>
+std::vector<const Pass*> PassDriver<PassDriverME>::g_default_pass_list(PassDriver<PassDriverME>::g_passes, PassDriver<PassDriverME>::g_passes + PassDriver<PassDriverME>::g_passes_size);
+
+PassDriverME::PassDriverME(CompilationUnit* cu)
+    : PassDriver(), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") {
+  pass_me_data_holder_.bb = nullptr;
+  pass_me_data_holder_.c_unit = cu;
+}
+
+PassDriverME::~PassDriverME() {
+}
+
+void PassDriverME::DispatchPass(const Pass* pass) {
+  VLOG(compiler) << "Dispatching " << pass->GetName();
+  const PassME* me_pass = down_cast<const PassME*>(pass);
+
+  DataFlowAnalysisMode mode = me_pass->GetTraversal();
+
+  switch (mode) {
+    case kPreOrderDFSTraversal:
+      DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingPreOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingPostOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kReversePostOrderDFSTraversal:
+      DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingReversePostOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kPostOrderDOMTraversal:
+      DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kAllNodes:
+      DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kNoNodes:
+      break;
+    default:
+      LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode;
+      break;
+  }
+}
+
+bool PassDriverME::RunPass(const Pass* pass, bool time_split) {
+  // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name
+  DCHECK(pass != nullptr);
+  DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
+  CompilationUnit* c_unit = pass_me_data_holder_.c_unit;
+  DCHECK(c_unit != nullptr);
+
+  // Do we perform a time split
+  if (time_split) {
+    c_unit->NewTimingSplit(pass->GetName());
+  }
+
+  // Check the pass gate first.
+  bool should_apply_pass = pass->Gate(&pass_me_data_holder_);
+  if (should_apply_pass) {
+    // Applying the pass: first start, doWork, and end calls.
+    ApplyPass(&pass_me_data_holder_, pass);
+
+    // Do we want to log it?
+    if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
+      // Do we have a pass folder?
+      const PassME* me_pass = (down_cast<const PassME*>(pass));
+      const char* passFolder = me_pass->GetDumpCFGFolder();
+      DCHECK(passFolder != nullptr);
+
+      if (passFolder[0] != 0) {
+        // Create directory prefix.
+        std::string prefix = GetDumpCFGFolder();
+        prefix += passFolder;
+        prefix += "/";
+
+        c_unit->mir_graph->DumpCFG(prefix.c_str(), false);
+      }
+    }
+  }
+
+  // If the pass gate passed, we can declare success.
+  return should_apply_pass;
+}
+
+const char* PassDriverME::GetDumpCFGFolder() const {
+  return dump_cfg_folder_;
+}
+
+
+}  // namespace art
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
new file mode 100644
index 0000000..0142934
--- /dev/null
+++ b/compiler/dex/pass_driver_me.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_H_
+#define ART_COMPILER_DEX_PASS_DRIVER_ME_H_
+
+#include "bb_optimizations.h"
+#include "pass_driver.h"
+#include "pass_me.h"
+
+namespace art {
+
+class PassDriverME: public PassDriver<PassDriverME> {
+ public:
+  explicit PassDriverME(CompilationUnit* cu);
+  ~PassDriverME();
+  /**
+   * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode
+   */
+  void DispatchPass(const Pass* pass);
+  bool RunPass(const Pass* pass, bool time_split = false);
+  const char* GetDumpCFGFolder() const;
+ protected:
+  /** @brief The data holder that contains data needed for the PassDriverME. */
+  PassMEDataHolder pass_me_data_holder_;
+
+  /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */
+  const char* dump_cfg_folder_;
+};
+
+}  // namespace art
+#endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_H_
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
new file mode 100644
index 0000000..1132166
--- /dev/null
+++ b/compiler/dex/pass_me.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_PASS_ME_H_
+#define ART_COMPILER_DEX_PASS_ME_H_
+
+#include <string>
+#include "pass.h"
+
+namespace art {
+
+// Forward declarations.
+struct BasicBlock;
+struct CompilationUnit;
+class Pass;
+
+/**
+ * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass.
+ * @details Each enum should be a power of 2 to be correctly used.
+ */
+enum OptimizationFlag {
+};
+
+// Data holder class.
+class PassMEDataHolder: public PassDataHolder {
+  public:
+    CompilationUnit* c_unit;
+    BasicBlock* bb;
+};
+
+enum DataFlowAnalysisMode {
+  kAllNodes = 0,                           /**< @brief All nodes. */
+  kPreOrderDFSTraversal,                   /**< @brief Depth-First-Search / Pre-Order. */
+  kRepeatingPreOrderDFSTraversal,          /**< @brief Depth-First-Search / Repeating Pre-Order. */
+  kReversePostOrderDFSTraversal,           /**< @brief Depth-First-Search / Reverse Post-Order. */
+  kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
+  kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
+  kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
+  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
+};
+
+/**
+ * @class Pass
+ * @brief Pass is the Pass structure for the optimizations.
+ * @details The following structure has the different optimization passes that we are going to do.
+ */
+class PassME: public Pass {
+ public:
+  explicit PassME(const char* name, DataFlowAnalysisMode type = kAllNodes,
+          unsigned int flags = 0u, const char* dump = "")
+    : Pass(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
+  }
+
+  PassME(const char* name, DataFlowAnalysisMode type, const char* dump)
+    : Pass(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
+  }
+
+  PassME(const char* name, const char* dump)
+    : Pass(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  }
+
+  ~PassME() {
+  }
+
+  virtual DataFlowAnalysisMode GetTraversal() const {
+    return traversal_type_;
+  }
+
+  const char* GetDumpCFGFolder() const {
+    return dump_cfg_folder_;
+  }
+
+  bool GetFlag(OptimizationFlag flag) const {
+    return (flags_ & flag);
+  }
+
+ protected:
+  /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */
+  const DataFlowAnalysisMode traversal_type_;
+
+  /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */
+  const unsigned int flags_;
+
+  /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */
+  const char* const dump_cfg_folder_;
+};
+}  // namespace art
+#endif  // ART_COMPILER_DEX_PASS_ME_H_
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 1ee59c6..2d1c19e 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -54,8 +54,6 @@
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
-    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
-    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -68,7 +66,6 @@
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 2d4834c..384a008 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -284,10 +284,10 @@
     ccode = FlipComparisonOrder(ccode);
   }
   if (rl_src2.is_const) {
-    RegLocation rl_temp = UpdateLocWide(rl_src2);
+    rl_src2 = UpdateLocWide(rl_src2);
     // Do special compare/branch against simple const operand if not already in registers.
     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-    if ((rl_temp.location != kLocPhysReg) &&
+    if ((rl_src2.location != kLocPhysReg) &&
         ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
       GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
       return;
@@ -1092,6 +1092,8 @@
         DCHECK(!res_hi.Valid());
         DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
         DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
+        // Will force free src1_hi, so must clobber.
+        Clobber(rl_src1.reg);
         FreeTemp(rl_src1.reg.GetHigh());
         res_hi = AllocTemp();
       }
@@ -1103,9 +1105,7 @@
               tmp1.GetReg());
       NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
       if (reg_status == 2) {
-        // Clobber rl_src1 since it was corrupted.
-        FreeTemp(rl_src1.reg);
-        Clobber(rl_src1.reg);
+        FreeTemp(rl_src1.reg.GetLow());
       }
     }
 
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 8cf1f86..1520c52 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -46,6 +46,7 @@
 static const RegStorage dp_temps_arr[] =
     {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7};
 
+static const std::vector<RegStorage> empty_pool;
 static const std::vector<RegStorage> core_regs(core_regs_arr,
     core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
 static const std::vector<RegStorage> sp_regs(sp_regs_arr,
@@ -554,26 +555,11 @@
   return new ArmMir2Lir(cu, mir_graph, arena);
 }
 
-// Alloc a pair of core registers, or a double.
-RegStorage ArmMir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble();
-  } else {
-    RegStorage low_reg = AllocTemp();
-    RegStorage high_reg = AllocTemp();
-    return RegStorage::MakeRegPair(low_reg, high_reg);
-  }
-}
-
-RegStorage ArmMir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg))
-    return AllocTempSingle();
-  return AllocTemp();
-}
-
 void ArmMir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
-                                        core_temps, sp_temps, dp_temps);
+  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */, sp_regs,
+                                        dp_regs, reserved_regs, empty_pool /* reserved64 */,
+                                        core_temps, empty_pool /* core64_temps */, sp_temps,
+                                        dp_temps);
 
   // Target-specific adjustments.
 
@@ -609,18 +595,6 @@
   reg_pool_->next_dp_reg_ = 0;
 }
 
-void ArmMir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
-    // No overlap, free.
-    FreeTemp(rl_free.reg);
-  }
-}
-
 /*
  * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
  * instructions might call out to C/assembly helper functions.  Until
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index b0211d6..86d32f4 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -853,7 +853,7 @@
         load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg(),
                        encoded_disp);
       }
-      if ((displacement & ~1020) != 0 && !r_dest.IsFloat()) {
+      if ((displacement & ~1020) != 0 && r_dest.IsFloat()) {
         FreeTemp(r_ptr);
       }
       already_generated = true;
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index f98e366..c3b23fd 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -95,13 +95,8 @@
  * +========================+
  */
 
-// Offset to distinguish FP regs.
-#define ARM_FP_REG_OFFSET 32
 // First FP callee save.
-#define ARM_FP_CALLEE_SAVE_BASE 16
-
-// Mask to strip off fp flags.
-#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET - 1)
+#define A64_FP_CALLEE_SAVE_BASE 8
 
 // Temporary macros, used to mark code which wants to distinguish betweek zr/sp.
 #define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp)
@@ -134,30 +129,32 @@
   R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
 
 // Registers (integer) values.
-// TODO(Arm64): for now we define rx##nr identically to rw##nr. We should rather define rx##nr as
-// a k64BitSolo. We should do this once the register allocator is ready.
 enum A64NativeRegisterPool {
 #  define A64_DEFINE_REGISTERS(nr) \
     rw##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
-    rx##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
+    rx##nr = RegStorage::k64BitSolo | RegStorage::kCoreRegister | nr, \
     rf##nr = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | nr, \
     rd##nr = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | nr,
   A64_REGISTER_CODE_LIST(A64_DEFINE_REGISTERS)
 #undef A64_DEFINE_REGISTERS
 
-  // TODO(Arm64): can we change the lines below such that rwzr != rwsp && rxzr != rsp?
-  //   This would be desirable to allow detecting usage-errors in the assembler.
-  rwzr = rw31,
-  rxzr = rx31,
+  rwzr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0x3f,
+  rxzr = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 0x3f,
   rwsp = rw31,
   rsp = rx31,
-  // TODO: rx4 is an argument register in C ABI which is not a good idea,
-  // But we need to decide to use caller save register in C ABI or callee save register.
-  // Because it will result to different implementation in the trampoline.
-  rA64_SUSPEND = rx4,
+  rA64_SUSPEND = rx19,
   rA64_SELF = rx18,
   rA64_SP = rx31,
-  rA64_LR = rx30
+  rA64_LR = rx30,
+  /*
+   * FIXME: It's a bit awkward to define both 32 and 64-bit views of these - we'll only ever use
+   * the 64-bit view. However, for now we'll define a 32-bit view to keep these from being
+   * allocated as 32-bit temp registers.
+   */
+  rA32_SUSPEND = rw19,
+  rA32_SELF = rw18,
+  rA32_SP = rw31,
+  rA32_LR = rw30
 };
 
 #define A64_DEFINE_REGSTORAGES(nr) \
@@ -174,6 +171,11 @@
 constexpr RegStorage rs_rA64_SELF(RegStorage::kValid | rA64_SELF);
 constexpr RegStorage rs_rA64_SP(RegStorage::kValid | rA64_SP);
 constexpr RegStorage rs_rA64_LR(RegStorage::kValid | rA64_LR);
+// TODO: eliminate the need for these.
+constexpr RegStorage rs_rA32_SUSPEND(RegStorage::kValid | rA32_SUSPEND);
+constexpr RegStorage rs_rA32_SELF(RegStorage::kValid | rA32_SELF);
+constexpr RegStorage rs_rA32_SP(RegStorage::kValid | rA32_SP);
+constexpr RegStorage rs_rA32_LR(RegStorage::kValid | rA32_LR);
 
 // RegisterLocation templates return values (following the hard-float calling convention).
 const RegLocation arm_loc_c_return =
@@ -233,9 +235,11 @@
   kA64B1t,           // b   [00010100] offset_26[25-0].
   kA64Cbnz2rt,       // cbnz[00110101] imm_19[23-5] rt[4-0].
   kA64Cbz2rt,        // cbz [00110100] imm_19[23-5] rt[4-0].
-  kA64Cmn3Rro,       // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+  kA64Cmn3rro,       // cmn [s0101011] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] [11111].
+  kA64Cmn3Rre,       // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
   kA64Cmn3RdT,       // cmn [00110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
-  kA64Cmp3Rro,       // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+  kA64Cmp3rro,       // cmp [s1101011] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] [11111].
+  kA64Cmp3Rre,       // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
   kA64Cmp3RdT,       // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
   kA64Csel4rrrc,     // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
   kA64Csinc4rrrc,    // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
@@ -279,6 +283,7 @@
   kA64Ldr4fXxG,      // ldr [1s111100011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
   kA64Ldr4rXxG,      // ldr [1s111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
   kA64LdrPost3rXd,   // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Ldp4ffXD,      // ldp [0s10110101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64Ldp4rrXD,      // ldp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64LdpPost4rrXD,  // ldp [s010100011] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64Ldur3fXd,      // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0].
@@ -306,7 +311,8 @@
   kA64Scvtf2fx,      // scvtf  [100111100s100010000000] rn[9-5] rd[4-0].
   kA64Sdiv3rrr,      // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
   kA64Smaddl4xwwx,   // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
-  kA64Stp4rrXD,      // stp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Stp4ffXD,      // stp [0s10110100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Stp4rrXD,      // stp [s010100100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64StpPost4rrXD,  // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64StpPre4rrXD,   // stp [s010100110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64Str3fXD,       // str [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
@@ -355,9 +361,6 @@
 #define FUNWIDE UNWIDE
 #define IS_FWIDE IS_WIDE
 
-#define OP_KIND_UNWIDE(opcode) (opcode)
-#define OP_KIND_IS_WIDE(opcode) (false)
-
 enum ArmOpDmbOptions {
   kSY = 0xf,
   kST = 0xe,
@@ -390,6 +393,9 @@
   kFmtSkip,      // Unused field, but continue to next.
 };
 
+// TODO(Arm64): should we get rid of kFmtExtend?
+//   Note: the only instructions that use it (cmp, cmn) are not used themselves.
+
 // Struct used to define the snippet positions for each A64 opcode.
 struct ArmEncodingMap {
   uint32_t wskeleton;
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 93caf89..656f8fd 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -168,18 +168,26 @@
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | IS_BRANCH  | NEEDS_FIXUP,
                  "cbz", "!0r, !1t", kFixupCBxZ),
-    ENCODING_MAP(WIDE(kA64Cmn3Rro), SF_VARIANTS(0x6b20001f),
-                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+    ENCODING_MAP(WIDE(kA64Cmn3rro), SF_VARIANTS(0x2b00001f),
+                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmn", "!0R, !1r!2o", kFixupNone),
+                 "cmn", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmn3Rre), SF_VARIANTS(0x2b20001f),
+                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtExtend, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmn", "!0R, !1r!2e", kFixupNone),
     ENCODING_MAP(WIDE(kA64Cmn3RdT), SF_VARIANTS(0x3100001f),
                  kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
                  "cmn", "!0R, #!1d!2T", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b00001f),
-                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+    ENCODING_MAP(WIDE(kA64Cmp3rro), SF_VARIANTS(0x6b00001f),
+                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0R, !1r!2o", kFixupNone),
+                 "cmp", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmp3Rre), SF_VARIANTS(0x6b20001f),
+                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtExtend, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0R, !1r!2e", kFixupNone),
     ENCODING_MAP(WIDE(kA64Cmp3RdT), SF_VARIANTS(0x7100001f),
                  kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
@@ -354,9 +362,13 @@
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01 | REG_USE1 | IS_LOAD,
                  "ldr", "!0r, [!1X], #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldp4ffXD), CUSTOM_VARIANTS(0x2d400000, 0x6d400000),
+                 kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+                 "ldp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
                  "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -462,9 +474,13 @@
                  kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
                  kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123,
                  "smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
+                 kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "stp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
                  "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -650,7 +666,7 @@
                   expected = "core register";
                 } else if (want_size_match && (reg.Is64Bit() != want_64_bit)) {
                   expected = (want_64_bit) ? "x-register" : "w-register";
-                } else if (reg.GetRegNum() == 31 && is_zero == want_zero) {
+                } else if (reg.GetRegNum() == 31 && is_zero != want_zero) {
                   expected = (want_zero) ? "zero-register" : "sp-register";
                 }
               }
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 136a04f..f7a0199 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -25,7 +25,10 @@
 
 bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
                                   const InlineMethod& special) {
-  return Mir2Lir::GenSpecialCase(bb, mir, special);
+  // TODO(Arm64): re-enable this, once hard-float ABI is implemented.
+  //   (this currently does not work, as GetArgMappingToPhysicalReg returns InvalidReg()).
+  // return Mir2Lir::GenSpecialCase(bb, mir, special);
+  return false;
 }
 
 /*
@@ -348,18 +351,16 @@
     OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
   }
 
-  /* Spill core callee saves */
-  if (core_spill_mask_) {
-    SpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
-  }
   /* Need to spill any FP regs? */
-  if (num_fp_spills_) {
-    /*
-     * NOTE: fp spills are a little different from core spills in that
-     * they are pushed as a contiguous block.  When promoting from
-     * the fp set, we must allocate all singles from s16..highest-promoted
-     */
-    // TODO(Arm64): SpillFPRegs(rA64_SP, frame_size_, core_spill_mask_);
+  if (fp_spill_mask_) {
+    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
+    SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
+  }
+
+  /* Spill core callee saves. */
+  if (core_spill_mask_) {
+    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
+    SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -379,12 +380,15 @@
   LockTemp(rs_x1);
 
   NewLIR0(kPseudoMethodExit);
+
   /* Need to restore any FP callee saves? */
-  if (num_fp_spills_) {
-    // TODO(Arm64): UnspillFPRegs(num_fp_spills_);
+  if (fp_spill_mask_) {
+    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
+    UnSpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
   }
   if (core_spill_mask_) {
-    UnSpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
+    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
+    UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
   }
 
   OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true);
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 418a989..350e483 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -54,8 +54,6 @@
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
-    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
-    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -68,12 +66,10 @@
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
-    RegStorage AllocPreservedDouble(int s_reg);
 
     // Required for target - miscellaneous.
     void AssembleLIR();
@@ -157,6 +153,8 @@
     uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2);
     void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
     void SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
+    void UnSpillFPRegs(RegStorage base, int offset, uint32_t reg_mask);
+    void SpillFPRegs(RegStorage base, int offset, uint32_t reg_mask);
 
     // Required for target - single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
@@ -195,7 +193,7 @@
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
     LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift,
                           bool is_wide = false);
-    LIR* OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, int shift, bool is_wide = false);
+    LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     static const ArmEncodingMap EncodingMap[kA64Last];
     int EncodeShift(int code, int amount);
     int EncodeExtend(int extend_type, int amount);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index f2a57e7..b0f5904 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -697,11 +697,19 @@
   GenDivZeroCheck(kCondEq);
 }
 
-// TODO(Arm64): the function below should go.
 // Test suspend flag, return target of taken suspend branch
 LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
+  // TODO(Arm64): re-enable suspend checks, once art_quick_test_suspend is implemented and
+  //   the suspend register is properly handled in the trampolines.
+#if 0
   NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1);
   return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
+#else
+  // TODO(Arm64): Fake suspend check. Will always fail to branch. Remove this.
+  LIR* branch = NewLIR2((target == NULL) ? kA64Cbnz2rt : kA64Cbz2rt, rwzr, 0);
+  branch->target = target;
+  return branch;
+#endif
 }
 
 // Decrement register and branch on condition
@@ -1199,34 +1207,61 @@
 
 void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
   int reg1 = -1, reg2 = -1;
-  const int pop_log2_size = 3;
+  const int reg_log2_size = 3;
 
-  for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
      reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
-      // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
-      NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+      NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
     } else {
-      // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
-      NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo32(reg1).GetReg(),
-              RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+      NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
+              RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
     }
   }
 }
 
 void Arm64Mir2Lir::SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
   int reg1 = -1, reg2 = -1;
-  const int pop_log2_size = 3;
+  const int reg_log2_size = 3;
 
-  for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
-      // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
-      NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+      NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
     } else {
-      // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
-      NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo32(reg1).GetReg(),
-              RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+      NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
+              RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+void Arm64Mir2Lir::UnSpillFPRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int reg_log2_size = 3;
+
+  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
+     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      NewLIR3(FWIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
+              RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+// TODO(Arm64): consider using ld1 and st1?
+void Arm64Mir2Lir::SpillFPRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int reg_log2_size = 3;
+
+  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
+    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
+              RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
     }
   }
 }
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 10be0d6..2b1c5e8 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -27,10 +27,17 @@
 
 // TODO: rework this when c++11 support allows.
 static const RegStorage core_regs_arr[] =
+    {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
+     rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15,
+     rs_w16, rs_w17, rs_w18, rs_w19, rs_w20, rs_w21, rs_w22, rs_w23,
+     rs_w24, rs_w25, rs_w26, rs_w27, rs_w28, rs_w29, rs_w30, rs_w31,
+     rs_wzr};
+static const RegStorage core64_regs_arr[] =
     {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
      rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
      rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
-     rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31};
+     rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31,
+     rs_xzr};
 static const RegStorage sp_regs_arr[] =
     {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
      rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
@@ -42,12 +49,18 @@
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
 static const RegStorage reserved_regs_arr[] =
-    {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR};
-// TUING: Are there too many temp registers and too less promote target?
+    {rs_rA32_SUSPEND, rs_rA32_SELF, rs_rA32_SP, rs_rA32_LR, rs_wzr};
+static const RegStorage reserved64_regs_arr[] =
+    {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR, rs_xzr};
+// TUNING: Are there too many temp registers and too less promote target?
 // This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
 // Note: we are not able to call to C function directly if it un-match C ABI.
 // Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
 static const RegStorage core_temps_arr[] =
+    {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
+     rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
+     rs_w17};
+static const RegStorage core64_temps_arr[] =
     {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
      rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
      rs_x17};
@@ -62,14 +75,20 @@
 
 static const std::vector<RegStorage> core_regs(core_regs_arr,
     core_regs_arr + arraysize(core_regs_arr));
+static const std::vector<RegStorage> core64_regs(core64_regs_arr,
+    core64_regs_arr + arraysize(core64_regs_arr));
 static const std::vector<RegStorage> sp_regs(sp_regs_arr,
     sp_regs_arr + arraysize(sp_regs_arr));
 static const std::vector<RegStorage> dp_regs(dp_regs_arr,
     dp_regs_arr + arraysize(dp_regs_arr));
 static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
     reserved_regs_arr + arraysize(reserved_regs_arr));
+static const std::vector<RegStorage> reserved64_regs(reserved64_regs_arr,
+    reserved64_regs_arr + arraysize(reserved64_regs_arr));
 static const std::vector<RegStorage> core_temps(core_temps_arr,
     core_temps_arr + arraysize(core_temps_arr));
+static const std::vector<RegStorage> core64_temps(core64_temps_arr,
+    core64_temps_arr + arraysize(core64_temps_arr));
 static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr));
 static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr));
 
@@ -377,14 +396,14 @@
              strcpy(tbuf, name);
              break;
            case 's':
-             snprintf(tbuf, arraysize(tbuf), "s%d", operand & ARM_FP_REG_MASK);
+             snprintf(tbuf, arraysize(tbuf), "s%d", operand & RegStorage::kRegNumMask);
              break;
            case 'S':
-             snprintf(tbuf, arraysize(tbuf), "d%d", operand & ARM_FP_REG_MASK);
+             snprintf(tbuf, arraysize(tbuf), "d%d", operand & RegStorage::kRegNumMask);
              break;
            case 'f':
              snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_FWIDE(lir->opcode)) ? 'd' : 's',
-                      operand & ARM_FP_REG_MASK);
+                      operand & RegStorage::kRegNumMask);
              break;
            case 'l': {
                bool is_wide = IS_WIDE(lir->opcode);
@@ -463,7 +482,7 @@
              break;
            case 'R': {
                bool is_wide = IS_WIDE(lir->opcode);
-               if (LIKELY(operand != rwsp || operand != rsp)) {
+               if (LIKELY(operand != rwsp && operand != rsp)) {
                  snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
                           operand & RegStorage::kRegNumMask);
                } else {
@@ -577,35 +596,17 @@
   return new Arm64Mir2Lir(cu, mir_graph, arena);
 }
 
-// Alloc a pair of core registers, or a double.
-RegStorage Arm64Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble();
-  } else {
-    RegStorage low_reg = AllocTemp();
-    RegStorage high_reg = AllocTemp();
-    return RegStorage::MakeRegPair(low_reg, high_reg);
-  }
-}
-
-RegStorage Arm64Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg))
-    return AllocTempSingle();
-  return AllocTemp();
-}
-
 void Arm64Mir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
-                                        core_temps, sp_temps, dp_temps);
+  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, core64_regs, sp_regs, dp_regs,
+                                        reserved_regs, reserved64_regs, core_temps, core64_temps,
+                                        sp_temps, dp_temps);
 
   // Target-specific adjustments.
-
-  // Alias single precision floats to appropriate half of overlapping double.
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
-    int sp_reg_num = info->GetReg().GetRegNum();
-    int dp_reg_num = sp_reg_num >> 1;
-    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
+  // Alias single precision float registers to corresponding double registers.
+  GrowableArray<RegisterInfo*>::Iterator fp_it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = fp_it.Next(); info != nullptr; info = fp_it.Next()) {
+    int fp_reg_num = info->GetReg().GetRegNum();
+    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | fp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
     // Double precision register's master storage should refer to itself.
     DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
@@ -613,10 +614,6 @@
     info->SetMaster(dp_reg_info);
     // Singles should show a single 32-bit mask bit, at first referring to the low half.
     DCHECK_EQ(info->StorageMask(), 0x1U);
-    if (sp_reg_num & 1) {
-      // For odd singles, change to user the high word of the backing double.
-      info->SetStorageMask(0x2);
-    }
   }
 
   // TODO: re-enable this when we can safely save r4 over the suspension code path.
@@ -632,10 +629,6 @@
   reg_pool_->next_dp_reg_ = 0;
 }
 
-void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  LOG(FATAL) << "Unexpected call to FreeRegLocTemps for Arm64";
-}
-
 /*
  * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
  * instructions might call out to C/assembly helper functions.  Until
@@ -648,14 +641,11 @@
 }
 
 /*
- * Mark a callee-save fp register as promoted.  Note that
- * vpush/vpop uses contiguous register lists so we must
- * include any holes in the mask.  Associate holes with
- * Dalvik register INVALID_VREG (0xFFFFU).
+ * Mark a callee-save fp register as promoted.
  */
 void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
-  DCHECK_GE(reg.GetRegNum(), ARM_FP_CALLEE_SAVE_BASE);
-  int adjusted_reg_num = reg.GetRegNum() - ARM_FP_CALLEE_SAVE_BASE;
+  DCHECK(reg.IsFloat());
+  int adjusted_reg_num = reg.GetRegNum() - A64_FP_CALLEE_SAVE_BASE;
   // Ensure fp_vmap_table is large enough
   int table_size = fp_vmap_table_.size();
   for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
@@ -665,29 +655,36 @@
   fp_vmap_table_[adjusted_reg_num] = v_reg;
   // Size of fp_vmap_table is high-water mark, use to set mask
   num_fp_spills_ = fp_vmap_table_.size();
-  fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << ARM_FP_CALLEE_SAVE_BASE;
+  fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << A64_FP_CALLEE_SAVE_BASE;
 }
 
 void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
-  // TEMP: perform as 2 singles.
-  int reg_num = reg.GetRegNum() << 1;
-  RegStorage lo = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num);
-  RegStorage hi = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num | 1);
-  MarkPreservedSingle(v_reg, lo);
-  MarkPreservedSingle(v_reg + 1, hi);
+  DCHECK(reg.IsDouble());
+  MarkPreservedSingle(v_reg, reg);
 }
 
 /* Clobber all regs that might be used by an external C call */
 void Arm64Mir2Lir::ClobberCallerSave() {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(WARNING);
-
   Clobber(rs_x0);
   Clobber(rs_x1);
   Clobber(rs_x2);
   Clobber(rs_x3);
+  Clobber(rs_x4);
+  Clobber(rs_x5);
+  Clobber(rs_x6);
+  Clobber(rs_x7);
+  Clobber(rs_x8);
+  Clobber(rs_x9);
+  Clobber(rs_x10);
+  Clobber(rs_x11);
   Clobber(rs_x12);
+  Clobber(rs_x13);
+  Clobber(rs_x14);
+  Clobber(rs_x15);
+  Clobber(rs_x16);
+  Clobber(rs_x17);
   Clobber(rs_x30);
+
   Clobber(rs_f0);
   Clobber(rs_f1);
   Clobber(rs_f2);
@@ -696,14 +693,22 @@
   Clobber(rs_f5);
   Clobber(rs_f6);
   Clobber(rs_f7);
-  Clobber(rs_f8);
-  Clobber(rs_f9);
-  Clobber(rs_f10);
-  Clobber(rs_f11);
-  Clobber(rs_f12);
-  Clobber(rs_f13);
-  Clobber(rs_f14);
-  Clobber(rs_f15);
+  Clobber(rs_f16);
+  Clobber(rs_f17);
+  Clobber(rs_f18);
+  Clobber(rs_f19);
+  Clobber(rs_f20);
+  Clobber(rs_f21);
+  Clobber(rs_f22);
+  Clobber(rs_f23);
+  Clobber(rs_f24);
+  Clobber(rs_f25);
+  Clobber(rs_f26);
+  Clobber(rs_f27);
+  Clobber(rs_f28);
+  Clobber(rs_f29);
+  Clobber(rs_f30);
+  Clobber(rs_f31);
 }
 
 RegLocation Arm64Mir2Lir::GetReturnWideAlt() {
@@ -776,61 +781,6 @@
   return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
 }
 
-/*
- * Somewhat messy code here.  We want to allocate a pair of contiguous
- * physical single-precision floating point registers starting with
- * an even numbered reg.  It is possible that the paired s_reg (s_reg+1)
- * has already been allocated - try to fit if possible.  Fail to
- * allocate if we can't meet the requirements for the pair of
- * s_reg<=sX[even] & (s_reg+1)<= sX+1.
- */
-// TODO: needs rewrite to support non-backed 64-bit float regs.
-RegStorage Arm64Mir2Lir::AllocPreservedDouble(int s_reg) {
-  RegStorage res;
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  int p_map_idx = SRegToPMap(s_reg);
-  if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
-    // Upper reg is already allocated.  Can we fit?
-    int high_reg = promotion_map_[p_map_idx+1].FpReg;
-    if ((high_reg & 1) == 0) {
-      // High reg is even - fail.
-      return res;  // Invalid.
-    }
-    // Is the low reg of the pair free?
-    // FIXME: rework.
-    RegisterInfo* p = GetRegInfo(RegStorage::FloatSolo32(high_reg - 1));
-    if (p->InUse() || p->IsTemp()) {
-      // Already allocated or not preserved - fail.
-      return res;  // Invalid.
-    }
-    // OK - good to go.
-    res = RegStorage::FloatSolo64(p->GetReg().GetRegNum() >> 1);
-    p->MarkInUse();
-    MarkPreservedSingle(v_reg, p->GetReg());
-  } else {
-    /*
-     * TODO: until runtime support is in, make sure we avoid promoting the same vreg to
-     * different underlying physical registers.
-     */
-    GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->dp_regs_);
-    for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
-      if (!info->IsTemp() && !info->InUse()) {
-        res = info->GetReg();
-        info->MarkInUse();
-        MarkPreservedDouble(v_reg, info->GetReg());
-        break;
-      }
-    }
-  }
-  if (res.Valid()) {
-    promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx].FpReg = res.DoubleToLowSingle().GetReg();
-    promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx+1].FpReg = res.DoubleToHighSingle().GetReg();
-  }
-  return res;
-}
-
 // TODO(Arm64): reuse info in QuickArgumentVisitor?
 static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
                                     OpSize* op_size) {
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 77e4c3c..39e9fad 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -360,18 +360,17 @@
   return NewLIR1(opcode, r_dest_src.GetReg());
 }
 
-LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2,
-                                 int shift, bool is_wide) {
-  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
+  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+  CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
   ArmOpcode opcode = kA64Brk1d;
 
-  switch (OP_KIND_UNWIDE(op)) {
+  switch (op) {
     case kOpCmn:
-      opcode = kA64Cmn3Rro;
+      opcode = kA64Cmn3rro;
       break;
     case kOpCmp:
-      // TODO(Arm64): check the instruction above: "cmp w0, w1" is rendered as "cmp w0, w1, uxtb".
-      opcode = kA64Cmp3Rro;
+      opcode = kA64Cmp3rro;
       break;
     case kOpMov:
       opcode = kA64Mov2rr;
@@ -388,39 +387,38 @@
     case kOpRev:
       DCHECK_EQ(shift, 0);
       // Binary, but rm is encoded twice.
-      return NewLIR3(kA64Rev2rr | wide, r_dest_src1, r_src2, r_src2);
+      return NewLIR3(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
       break;
     case kOpRevsh:
       // Binary, but rm is encoded twice.
-      return NewLIR3(kA64Rev162rr | wide, r_dest_src1, r_src2, r_src2);
+      return NewLIR3(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
       break;
     case kOp2Byte:
       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
       // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
       // For now we use sbfm directly.
-      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 7);
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
     case kOp2Short:
       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
       // For now we use sbfm rather than its alias, sbfx.
-      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
     case kOp2Char:
       // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
       // For now we use ubfm directly.
       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
+      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
     default:
-      return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
+      return OpRegRegRegShift(op, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift);
   }
 
   DCHECK(!IsPseudoLirOp(opcode));
   if (EncodingMap[opcode].flags & IS_BINARY_OP) {
     DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-    return NewLIR2(opcode | wide, r_dest_src1, r_src2);
+    return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
   } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
     ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
-    if (kind == kFmtExtend || kind == kFmtShift) {
-      DCHECK_EQ(kind == kFmtExtend, IsExtendEncoding(shift));
-      return NewLIR3(opcode | wide, r_dest_src1, r_src2, shift);
+    if (kind == kFmtShift) {
+      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
     }
   }
 
@@ -429,8 +427,7 @@
 }
 
 LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
-  return OpRegRegShift(op, r_dest_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT,
-                       r_dest_src1.Is64Bit());
+  return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
 }
 
 LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
@@ -452,7 +449,7 @@
                                     int r_src2, int shift, bool is_wide) {
   ArmOpcode opcode = kA64Brk1d;
 
-  switch (OP_KIND_UNWIDE(op)) {
+  switch (op) {
     case kOpAdd:
       opcode = kA64Add4rrro;
       break;
@@ -525,10 +522,10 @@
   ArmOpcode opcode = kA64Brk1d;
   ArmOpcode alt_opcode = kA64Brk1d;
   int32_t log_imm = -1;
-  bool is_wide = OP_KIND_IS_WIDE(op);
+  bool is_wide = r_dest.Is64Bit();
   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
 
-  switch (OP_KIND_UNWIDE(op)) {
+  switch (op) {
     case kOpLsl: {
       // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
       // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)".
@@ -639,7 +636,7 @@
     return res;
   }
 
-  switch (OP_KIND_UNWIDE(op)) {
+  switch (op) {
     case kOpAdd:
       neg_opcode = kA64Sub4RRdT;
       opcode = kA64Add4RRdT;
@@ -828,99 +825,66 @@
                                     OpSize size) {
   LIR* load = NULL;
   ArmOpcode opcode = kA64Brk1d;
-  bool short_form = false;
-  int encoded_disp = displacement;
+  ArmOpcode alt_opcode = kA64Brk1d;
+  int scale = 0;
+
   switch (size) {
     case kDouble:     // Intentional fall-through.
     case kWord:       // Intentional fall-through.
     case k64:
-      DCHECK_EQ(encoded_disp & 0x3, 0);
+      scale = 3;
       if (r_dest.IsFloat()) {
-        // Currently double values may be misaligned.
-        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
-          // Can use scaled load.
-          opcode = FWIDE(kA64Ldr3fXD);
-          encoded_disp >>= 3;
-          short_form = true;
-        } else if (IS_SIGNED_IMM9(displacement)) {
-          // Can use unscaled load.
-          opcode = FWIDE(kA64Ldur3fXd);
-          short_form = true;
-        } else {
-          short_form = false;
-        }
+        DCHECK(r_dest.IsDouble());
+        opcode = FWIDE(kA64Ldr3fXD);
+        alt_opcode = FWIDE(kA64Ldur3fXd);
       } else {
-        // Currently long values may be misaligned.
-        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
-          // Can use scaled store.
-          opcode = FWIDE(kA64Ldr3rXD);
-          encoded_disp >>= 3;
-          short_form = true;
-        } else if (IS_SIGNED_IMM9(displacement)) {
-          // Can use unscaled store.
-          opcode = FWIDE(kA64Ldur3rXd);
-          short_form = true;
-        }  // else: use long sequence (short_form = false).
+        opcode = FWIDE(kA64Ldr3rXD);
+        alt_opcode = FWIDE(kA64Ldur3rXd);
       }
       break;
     case kSingle:     // Intentional fall-through.
     case k32:         // Intentional fall-trough.
     case kReference:
+      scale = 2;
       if (r_dest.IsFloat()) {
+        DCHECK(r_dest.IsSingle());
         opcode = kA64Ldr3fXD;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
-        break;
-      }
-      if (displacement <= 16380 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x3), 0);
-        short_form = true;
-        encoded_disp >>= 2;
+      } else {
         opcode = kA64Ldr3rXD;
       }
       break;
     case kUnsignedHalf:
-      if (displacement < 64 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x1), 0);
-        short_form = true;
-        encoded_disp >>= 1;
-        opcode = kA64Ldrh3wXF;
-      } else if (displacement < 4092 && displacement >= 0) {
-        short_form = true;
-        opcode = kA64Ldrh3wXF;
-      }
+      scale = 1;
+      opcode = kA64Ldrh3wXF;
       break;
     case kSignedHalf:
-      short_form = true;
+      scale = 1;
       opcode = kA64Ldrsh3rXF;
       break;
     case kUnsignedByte:
-      short_form = true;
       opcode = kA64Ldrb3wXd;
       break;
     case kSignedByte:
-      short_form = true;
       opcode = kA64Ldrsb3rXd;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
 
-  if (short_form) {
-    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
+  int scaled_disp = displacement >> scale;
+  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
+    // Can use scaled load.
+    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
+  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
+    // Can use unscaled load.
+    load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
   } else {
-    RegStorage reg_offset = AllocTemp();
-    LoadConstant(reg_offset, encoded_disp);
-    if (r_dest.IsFloat()) {
-      // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
-      OpRegReg(kOpAdd, reg_offset, r_base);
-      load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
-    } else {
-      load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
-    }
-    FreeTemp(reg_offset);
+    // Use long sequence.
+    RegStorage r_scratch = AllocTemp();
+    LoadConstant(r_scratch, displacement);
+    load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
+    FreeTemp(r_scratch);
   }
 
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
@@ -947,92 +911,64 @@
                                      OpSize size) {
   LIR* store = NULL;
   ArmOpcode opcode = kA64Brk1d;
-  bool short_form = false;
-  int encoded_disp = displacement;
+  ArmOpcode alt_opcode = kA64Brk1d;
+  int scale = 0;
+
   switch (size) {
     case kDouble:     // Intentional fall-through.
     case kWord:       // Intentional fall-through.
     case k64:
-      DCHECK_EQ(encoded_disp & 0x3, 0);
+      scale = 3;
       if (r_src.IsFloat()) {
-        // Currently double values may be misaligned.
-        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
-          // Can use scaled store.
-          opcode = FWIDE(kA64Str3fXD);
-          encoded_disp >>= 3;
-          short_form = true;
-        } else if (IS_SIGNED_IMM9(displacement)) {
-          // Can use unscaled store.
-          opcode = FWIDE(kA64Stur3fXd);
-          short_form = true;
-        }  // else: use long sequence (short_form = false).
+        DCHECK(r_src.IsDouble());
+        opcode = FWIDE(kA64Str3fXD);
+        alt_opcode = FWIDE(kA64Stur3fXd);
       } else {
-        // Currently long values may be misaligned.
-        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
-          // Can use scaled store.
-          opcode = FWIDE(kA64Str3rXD);
-          encoded_disp >>= 3;
-          short_form = true;
-        } else if (IS_SIGNED_IMM9(displacement)) {
-          // Can use unscaled store.
-          opcode = FWIDE(kA64Stur3rXd);
-          short_form = true;
-        }  // else: use long sequence (short_form = false).
+        opcode = FWIDE(kA64Str3rXD);
+        alt_opcode = FWIDE(kA64Stur3rXd);
       }
       break;
     case kSingle:     // Intentional fall-through.
     case k32:         // Intentional fall-trough.
     case kReference:
+      scale = 2;
       if (r_src.IsFloat()) {
         DCHECK(r_src.IsSingle());
-        DCHECK_EQ(encoded_disp & 0x3, 0);
         opcode = kA64Str3fXD;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
-        break;
-      }
-
-      if (displacement <= 16380 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x3), 0);
-        short_form = true;
-        encoded_disp >>= 2;
+      } else {
         opcode = kA64Str3rXD;
       }
       break;
     case kUnsignedHalf:
     case kSignedHalf:
-      DCHECK_EQ((displacement & 0x1), 0);
-      short_form = true;
-      encoded_disp >>= 1;
+      scale = 1;
       opcode = kA64Strh3wXF;
       break;
     case kUnsignedByte:
     case kSignedByte:
-      short_form = true;
       opcode = kA64Strb3wXd;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
 
-  if (short_form) {
-    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
+  int scaled_disp = displacement >> scale;
+  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
+    // Can use scaled store.
+    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
+  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
+    // Can use unscaled store.
+    store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
   } else {
+    // Use long sequence.
     RegStorage r_scratch = AllocTemp();
-    LoadConstant(r_scratch, encoded_disp);
-    if (r_src.IsFloat()) {
-      // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
-      OpRegReg(kOpAdd, r_scratch, r_base);
-      store = StoreBaseDispBody(r_scratch, 0, r_src, size);
-    } else {
-      store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
-    }
+    LoadConstant(r_scratch, displacement);
+    store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
     FreeTemp(r_scratch);
   }
 
-  // TODO: In future, may need to differentiate Dalvik & spill accesses
+  // TODO: In future, may need to differentiate Dalvik & spill accesses.
   if (r_base == rs_rA64_SP) {
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
   }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6f81238..256135d 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -364,6 +364,18 @@
   return NULL;
 }
 
+/* Search the existing constants in the literal pool for an exact method match */
+LIR* Mir2Lir::ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method) {
+  while (data_target) {
+    if (static_cast<uint32_t>(data_target->operands[0]) == method.dex_method_index &&
+        UnwrapPointer(data_target->operands[1]) == method.dex_file) {
+      return data_target;
+    }
+    data_target = data_target->next;
+  }
+  return nullptr;
+}
+
 /*
  * The following are building blocks to insert constants into the pool or
  * instruction streams.
@@ -991,7 +1003,7 @@
     /* Convert LIR into machine code. */
     AssembleLIR();
 
-    if (cu_->verbose) {
+    if ((cu_->enable_debug & (1 << kDebugCodegenDump)) != 0) {
       CodegenDump();
     }
   }
@@ -1026,7 +1038,7 @@
     vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
   }
 
-  UniquePtr<std::vector<uint8_t> > cfi_info(ReturnCallFrameInformation());
+  std::unique_ptr<std::vector<uint8_t>> cfi_info(ReturnCallFrameInformation());
   CompiledMethod* result =
       new CompiledMethod(cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
                          core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
@@ -1143,11 +1155,13 @@
 
 void Mir2Lir::LoadCodeAddress(const MethodReference& target_method, InvokeType type,
                               SpecialTargetRegister symbolic_reg) {
-  int target_method_idx = target_method.dex_method_index;
-  LIR* data_target = ScanLiteralPool(code_literal_list_, target_method_idx, 0);
+  LIR* data_target = ScanLiteralPoolMethod(code_literal_list_, target_method);
   if (data_target == NULL) {
-    data_target = AddWordData(&code_literal_list_, target_method_idx);
+    data_target = AddWordData(&code_literal_list_, target_method.dex_method_index);
     data_target->operands[1] = WrapPointer(const_cast<DexFile*>(target_method.dex_file));
+    // NOTE: The invoke type doesn't contribute to the literal identity. In fact, we can have
+    // the same method invoked with kVirtual, kSuper and kInterface but the class linker will
+    // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
   LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
@@ -1157,11 +1171,13 @@
 
 void Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
                                 SpecialTargetRegister symbolic_reg) {
-  int target_method_idx = target_method.dex_method_index;
-  LIR* data_target = ScanLiteralPool(method_literal_list_, target_method_idx, 0);
+  LIR* data_target = ScanLiteralPoolMethod(method_literal_list_, target_method);
   if (data_target == NULL) {
-    data_target = AddWordData(&method_literal_list_, target_method_idx);
+    data_target = AddWordData(&method_literal_list_, target_method.dex_method_index);
     data_target->operands[1] = WrapPointer(const_cast<DexFile*>(target_method.dex_file));
+    // NOTE: The invoke type doesn't contribute to the literal identity. In fact, we can have
+    // the same method invoked with kVirtual, kSuper and kInterface but the class linker will
+    // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
   LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 3ec31ba..526c981 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -35,15 +35,9 @@
 namespace {  // anonymous namespace
 
 MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) {
-  ArenaAllocator* arena = mir_graph->GetArena();
-  MIR* insn = static_cast<MIR*>(arena->Alloc(sizeof(MIR), kArenaAllocMIR));
+  MIR* insn = mir_graph->NewMIR();
   insn->offset = invoke->offset;
-  insn->width = invoke->width;
   insn->optimization_flags = MIR_CALLEE;
-  if (move_return != nullptr) {
-    DCHECK_EQ(move_return->offset, invoke->offset + invoke->width);
-    insn->width += move_return->width;
-  }
   return insn;
 }
 
@@ -660,7 +654,6 @@
   }
 
   MIR* insn = AllocReplacementMIR(mir_graph, invoke, move_result);
-  insn->width += insn->offset - invoke->offset;
   insn->offset = invoke->offset;
   insn->dalvikInsn.opcode = opcode;
   insn->dalvikInsn.vA = move_result->dalvikInsn.vA;
@@ -737,9 +730,7 @@
 
   if (move_result != nullptr) {
     MIR* move = AllocReplacementMIR(mir_graph, invoke, move_result);
-    insn->width = invoke->width;
     move->offset = move_result->offset;
-    move->width = move_result->width;
     if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT) {
       move->dalvikInsn.opcode = Instruction::MOVE_FROM16;
     } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) {
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 350823d..de55a05 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -2082,6 +2082,15 @@
   }
 }
 
+void Mir2Lir::GenConst(RegLocation rl_dest, int value) {
+  RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
+  LoadConstantNoClobber(rl_result.reg, value);
+  StoreValue(rl_dest, rl_result);
+  if (value == 0) {
+    Workaround7250540(rl_dest, rl_result.reg);
+  }
+}
+
 template <size_t pointer_size>
 void Mir2Lir::GenConversionCall(ThreadOffset<pointer_size> func_offset,
                                 RegLocation rl_dest, RegLocation rl_src) {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 24ed4a3..721b345 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1030,9 +1030,9 @@
         bytes_to_move = sizeof(uint32_t) * 4;
 
         // Allocate a free xmm temp. Since we are working through the calling sequence,
-        // we expect to have an xmm temporary available.
+        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
+        // there are no free registers.
         RegStorage temp = AllocTempDouble();
-        DCHECK(temp.Valid());
 
         LIR* ld1 = nullptr;
         LIR* ld2 = nullptr;
@@ -1368,6 +1368,7 @@
   OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), sign_reg);
   OpRegReg(kOpXor, rl_result.reg.GetLow(), sign_reg);
   OpRegReg(kOpXor, rl_result.reg.GetHigh(), sign_reg);
+  FreeTemp(sign_reg);
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
@@ -1701,6 +1702,7 @@
 
   const MirMethodLoweringInfo& method_info = mir_graph_->GetMethodLoweringInfo(info->mir);
   cu_->compiler_driver->ProcessedInvoke(method_info.GetInvokeType(), method_info.StatsFlags());
+  BeginInvoke(info);
   InvokeType original_type = static_cast<InvokeType>(method_info.GetInvokeType());
   info->type = static_cast<InvokeType>(method_info.GetSharpType());
   bool fast_path = method_info.FastPath();
@@ -1765,6 +1767,7 @@
       }
     }
   }
+  EndInvoke(info);
   MarkSafepointPC(call_inst);
 
   ClobberCallerSave();
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 8fcb09b..f5e7e63 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -145,10 +145,11 @@
       // Wrong register class, realloc, copy and transfer ownership.
       RegStorage new_reg = AllocTypedTemp(rl_src.fp, op_kind);
       OpRegCopy(new_reg, rl_src.reg);
-      // Associate the old sreg with the new register and clobber the old register.
-      GetRegInfo(new_reg)->SetSReg(GetRegInfo(rl_src.reg)->SReg());
+      // Clobber the old reg.
       Clobber(rl_src.reg);
+      // ...and mark the new one live.
       rl_src.reg = new_reg;
+      MarkLive(rl_src);
     }
     return rl_src;
   }
@@ -222,10 +223,11 @@
       // Wrong register class, realloc, copy and transfer ownership.
       RegStorage new_regs = AllocTypedTempWide(rl_src.fp, op_kind);
       OpRegCopyWide(new_regs, rl_src.reg);
-      // Associate the old sreg with the new register and clobber the old register.
-      GetRegInfo(new_regs)->SetSReg(GetRegInfo(rl_src.reg)->SReg());
+      // Clobber the old regs.
       Clobber(rl_src.reg);
+      // ...and mark the new ones live.
       rl_src.reg = new_regs;
+      MarkLive(rl_src);
     }
     return rl_src;
   }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index c5b40da..2b57b35 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -54,8 +54,6 @@
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
-    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
-    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -68,7 +66,6 @@
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 35345e8..55cf434 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -46,6 +46,7 @@
 static RegStorage dp_temps_arr[] =
     {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
 
+static const std::vector<RegStorage> empty_pool;
 static const std::vector<RegStorage> core_regs(core_regs_arr,
     core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
 static const std::vector<RegStorage> sp_regs(sp_regs_arr,
@@ -442,27 +443,11 @@
 #endif
 }
 
-// Alloc a pair of core registers, or a double.
-RegStorage MipsMir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble();
-  }
-
-  RegStorage low_reg = AllocTemp();
-  RegStorage high_reg = AllocTemp();
-  return RegStorage::MakeRegPair(low_reg, high_reg);
-}
-
-RegStorage MipsMir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempSingle();
-  }
-  return AllocTemp();
-}
-
 void MipsMir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
-                                        core_temps, sp_temps, dp_temps);
+  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */, sp_regs,
+                                        dp_regs, reserved_regs, empty_pool /* reserved64 */,
+                                        core_temps, empty_pool /* core64_temps */, sp_temps,
+                                        dp_temps);
 
   // Target-specific adjustments.
 
@@ -492,17 +477,6 @@
   reg_pool_->next_dp_reg_ = 1;
 }
 
-void MipsMir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
-    // No overlap, free.
-    FreeTemp(rl_free.reg);
-  }
-}
 /*
  * In the Arm code a it is typical to use the link register
  * to hold the target address.  However, for Mips we must
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 2973e14..2f37520 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -26,10 +26,9 @@
 /* Mark a temp register as dead.  Does not affect allocation state. */
 inline void Mir2Lir::ClobberBody(RegisterInfo* p) {
   DCHECK(p->IsTemp());
-  if (!p->IsDead()) {
+  if (p->SReg() != INVALID_SREG) {
     DCHECK(!(p->IsLive() && p->IsDirty()))  << "Live & dirty temp in clobber";
     p->MarkDead();
-    p->ResetDefBody();
     if (p->IsWide()) {
       p->SetIsWide(false);
       if (p->GetReg() != p->Partner()) {
@@ -37,7 +36,6 @@
         p = GetRegInfo(p->Partner());
         p->SetIsWide(false);
         p->MarkDead();
-        p->ResetDefBody();
       }
     }
   }
@@ -151,7 +149,7 @@
 /*
  * Set up the proper fields in the resource mask
  */
-inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
+inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) {
   int opcode = lir->opcode;
 
   if (IsPseudoLirOp(opcode)) {
@@ -172,7 +170,7 @@
   lir->flags.size = GetInsnSize(lir);
   estimated_native_code_size_ += lir->flags.size;
   /* Set up the mask for resources that are updated */
-  if (flags & (IS_LOAD | IS_STORE)) {
+  if (!leave_mem_ref && (flags & (IS_LOAD | IS_STORE))) {
     /* Default to heap - will catch specialized classes later */
     SetMemRefType(lir, flags & IS_LOAD, kHeapRef);
   }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 10c2459..9fc93d0 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -318,6 +318,8 @@
   int opt_flags = mir->optimization_flags;
   uint32_t vB = mir->dalvikInsn.vB;
   uint32_t vC = mir->dalvikInsn.vC;
+  DCHECK(CheckCorePoolSanity()) << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " @ 0x:"
+                                << std::hex << current_dalvik_offset_;
 
   // Prep Src and Dest locations.
   int next_sreg = 0;
@@ -423,21 +425,11 @@
     case Instruction::CONST:
     case Instruction::CONST_4:
     case Instruction::CONST_16:
-      rl_result = EvalLoc(rl_dest, kAnyReg, true);
-      LoadConstantNoClobber(rl_result.reg, vB);
-      StoreValue(rl_dest, rl_result);
-      if (vB == 0) {
-        Workaround7250540(rl_dest, rl_result.reg);
-      }
+      GenConst(rl_dest, vB);
       break;
 
     case Instruction::CONST_HIGH16:
-      rl_result = EvalLoc(rl_dest, kAnyReg, true);
-      LoadConstantNoClobber(rl_result.reg, vB << 16);
-      StoreValue(rl_dest, rl_result);
-      if (vB == 0) {
-        Workaround7250540(rl_dest, rl_result.reg);
-      }
+      GenConst(rl_dest, vB << 16);
       break;
 
     case Instruction::CONST_WIDE_16:
@@ -946,6 +938,7 @@
     default:
       LOG(FATAL) << "Unexpected opcode: " << opcode;
   }
+  DCHECK(CheckCorePoolSanity());
 }  // NOLINT(readability/fn_size)
 
 // Process extended MIR instructions
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 3201b60..3584c33 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -339,23 +339,35 @@
       bool IsDead() { return (master_->liveness_ & storage_mask_) == 0; }
       // Liveness of this view matches.  Note: not equivalent to !IsDead().
       bool IsLive() { return (master_->liveness_ & storage_mask_) == storage_mask_; }
-      void MarkLive() { master_->liveness_ |= storage_mask_; }
+      void MarkLive(int s_reg) {
+        // TODO: Anything useful to assert here?
+        s_reg_ = s_reg;
+        master_->liveness_ |= storage_mask_;
+      }
       void MarkDead() {
-        master_->liveness_ &= ~storage_mask_;
-        SetSReg(INVALID_SREG);
+        if (SReg() != INVALID_SREG) {
+          s_reg_ = INVALID_SREG;
+          master_->liveness_ &= ~storage_mask_;
+          ResetDefBody();
+        }
       }
       RegStorage GetReg() { return reg_; }
       void SetReg(RegStorage reg) { reg_ = reg; }
       bool IsTemp() { return is_temp_; }
       void SetIsTemp(bool val) { is_temp_ = val; }
       bool IsWide() { return wide_value_; }
-      void SetIsWide(bool val) { wide_value_ = val; }
+      void SetIsWide(bool val) {
+        wide_value_ = val;
+        if (!val) {
+          // If not wide, reset partner to self.
+          SetPartner(GetReg());
+        }
+      }
       bool IsDirty() { return dirty_; }
       void SetIsDirty(bool val) { dirty_ = val; }
       RegStorage Partner() { return partner_; }
       void SetPartner(RegStorage partner) { partner_ = partner; }
-      int SReg() { return s_reg_; }
-      void SetSReg(int s_reg) { s_reg_ = s_reg; }
+      int SReg() { return (!IsTemp() || IsLive()) ? s_reg_ : INVALID_SREG; }
       uint64_t DefUseMask() { return def_use_mask_; }
       void SetDefUseMask(uint64_t def_use_mask) { def_use_mask_ = def_use_mask; }
       RegisterInfo* Master() { return master_; }
@@ -399,10 +411,15 @@
 
     class RegisterPool {
      public:
-      RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena, const std::vector<RegStorage>& core_regs,
-                   const std::vector<RegStorage>& sp_regs, const std::vector<RegStorage>& dp_regs,
+      RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
+                   const std::vector<RegStorage>& core_regs,
+                   const std::vector<RegStorage>& core64_regs,
+                   const std::vector<RegStorage>& sp_regs,
+                   const std::vector<RegStorage>& dp_regs,
                    const std::vector<RegStorage>& reserved_regs,
+                   const std::vector<RegStorage>& reserved64_regs,
                    const std::vector<RegStorage>& core_temps,
+                   const std::vector<RegStorage>& core64_temps,
                    const std::vector<RegStorage>& sp_temps,
                    const std::vector<RegStorage>& dp_temps);
       ~RegisterPool() {}
@@ -416,6 +433,8 @@
       }
       GrowableArray<RegisterInfo*> core_regs_;
       int next_core_reg_;
+      GrowableArray<RegisterInfo*> core64_regs_;
+      int next_core64_reg_;
       GrowableArray<RegisterInfo*> sp_regs_;    // Single precision float.
       int next_sp_reg_;
       GrowableArray<RegisterInfo*> dp_regs_;    // Double precision float.
@@ -473,6 +492,7 @@
       LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
                   LIR* cont = nullptr) :
         m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
+          m2l->StartSlowPath(cont);
       }
       virtual ~LIRSlowPath() {}
       virtual void Compile() = 0;
@@ -481,6 +501,14 @@
         return arena->Alloc(size, kArenaAllocData);
       }
 
+      LIR *GetContinuationLabel() {
+        return cont_;
+      }
+
+      LIR *GetFromFast() {
+        return fromfast_;
+      }
+
      protected:
       LIR* GenerateTargetLabel(int opcode = kPseudoTargetLabel);
 
@@ -571,7 +599,7 @@
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
     void MarkSafepointPC(LIR* inst);
-    void SetupResourceMasks(LIR* lir);
+    void SetupResourceMasks(LIR* lir, bool leave_mem_ref = false);
     void SetMemRefType(LIR* lir, bool is_load, int mem_type);
     void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit);
     void SetupRegMask(uint64_t* mask, int reg);
@@ -589,6 +617,7 @@
     LIR* NewLIR5(int opcode, int dest, int src1, int src2, int info1, int info2);
     LIR* ScanLiteralPool(LIR* data_target, int value, unsigned int delta);
     LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi);
+    LIR* ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method);
     LIR* AddWordData(LIR* *constant_list_p, int value);
     LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi);
     void ProcessSwitchTables();
@@ -613,6 +642,12 @@
     LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
     void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
     void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec);
+
+    virtual void StartSlowPath(LIR *label) {}
+    virtual void BeginInvoke(CallInfo* info) {}
+    virtual void EndInvoke(CallInfo* info) {}
+
+
     // Handle bookkeeping to convert a wide RegLocation to a narow RegLocation.  No code generated.
     RegLocation NarrowRegLoc(RegLocation loc);
 
@@ -620,7 +655,7 @@
     void ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src);
     void ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir);
     void ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir);
-    void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir);
+    virtual void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir);
 
     // Shared by all targets - implemented in ralloc_util.cc
     int GetSRegHi(int lowSreg);
@@ -644,17 +679,21 @@
     RegStorage AllocPreservedSingle(int s_reg);
     virtual RegStorage AllocPreservedDouble(int s_reg);
     RegStorage AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_temp, bool required);
-    RegStorage AllocFreeTemp();
-    RegStorage AllocTemp();
-    RegStorage AllocTempSingle();
-    RegStorage AllocTempDouble();
+    virtual RegStorage AllocFreeTemp();
+    virtual RegStorage AllocTemp();
+    virtual RegStorage AllocTempWide();
+    virtual RegStorage AllocTempSingle();
+    virtual RegStorage AllocTempDouble();
+    virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
+    virtual RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
     void FlushReg(RegStorage reg);
     void FlushRegWide(RegStorage reg);
     RegStorage AllocLiveReg(int s_reg, int reg_class, bool wide);
     RegStorage FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg);
-    void FreeTemp(RegStorage reg);
-    bool IsLive(RegStorage reg);
-    bool IsTemp(RegStorage reg);
+    virtual void FreeTemp(RegStorage reg);
+    virtual void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
+    virtual bool IsLive(RegStorage reg);
+    virtual bool IsTemp(RegStorage reg);
     bool IsPromoted(RegStorage reg);
     bool IsDirty(RegStorage reg);
     void LockTemp(RegStorage reg);
@@ -662,7 +701,7 @@
     void NullifyRange(RegStorage reg, int s_reg);
     void MarkDef(RegLocation rl, LIR *start, LIR *finish);
     void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
-    RegLocation WideToNarrow(RegLocation rl);
+    virtual RegLocation WideToNarrow(RegLocation rl);
     void ResetDefLoc(RegLocation rl);
     void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
@@ -671,16 +710,16 @@
     void FlushAllRegs();
     bool RegClassMatches(int reg_class, RegStorage reg);
     void MarkLive(RegLocation loc);
-    void MarkLiveReg(RegStorage reg, int s_reg);
     void MarkTemp(RegStorage reg);
     void UnmarkTemp(RegStorage reg);
     void MarkWide(RegStorage reg);
+    void MarkNarrow(RegStorage reg);
     void MarkClean(RegLocation loc);
     void MarkDirty(RegLocation loc);
     void MarkInUse(RegStorage reg);
     bool CheckCorePoolSanity();
-    RegLocation UpdateLoc(RegLocation loc);
-    RegLocation UpdateLocWide(RegLocation loc);
+    virtual RegLocation UpdateLoc(RegLocation loc);
+    virtual RegLocation UpdateLocWide(RegLocation loc);
     RegLocation UpdateRawLoc(RegLocation loc);
 
     /**
@@ -691,7 +730,7 @@
      * @param update Whether the liveness information should be updated.
      * @return Returns the properly typed temporary in physical register pairs.
      */
-    RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
+    virtual RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
 
     /**
      * @brief Used to prepare a register location to receive a value.
@@ -700,7 +739,7 @@
      * @param update Whether the liveness information should be updated.
      * @return Returns the properly typed temporary in physical register.
      */
-    RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
+    virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
 
     void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
@@ -716,7 +755,7 @@
     bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
                           RegLocation rl_src, RegLocation rl_dest, int lit);
     bool HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit);
-    void HandleSlowPaths();
+    virtual void HandleSlowPaths();
     void GenBarrier();
     void GenDivZeroException();
     // c_code holds condition code that's generated from testing divisor against 0.
@@ -770,8 +809,8 @@
     template <size_t pointer_size>
     void GenConversionCall(ThreadOffset<pointer_size> func_offset, RegLocation rl_dest,
                            RegLocation rl_src);
-    void GenSuspendTest(int opt_flags);
-    void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+    virtual void GenSuspendTest(int opt_flags);
+    virtual void GenSuspendTestAndBranch(int opt_flags, LIR* target);
 
     // This will be overridden by x86 implementation.
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
@@ -901,41 +940,41 @@
     // Shared by all targets - implemented in gen_loadstore.cc.
     RegLocation LoadCurrMethod();
     void LoadCurrMethodDirect(RegStorage r_tgt);
-    LIR* LoadConstant(RegStorage r_dest, int value);
+    virtual LIR* LoadConstant(RegStorage r_dest, int value);
     // Natural word size.
-    LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
+    virtual LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
       return LoadBaseDisp(r_base, displacement, r_dest, kWord);
     }
     // Load 32 bits, regardless of target.
-    LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
+    virtual LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
       return LoadBaseDisp(r_base, displacement, r_dest, k32);
     }
     // Load a reference at base + displacement and decompress into register.
-    LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
+    virtual LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
       return LoadBaseDisp(r_base, displacement, r_dest, kReference);
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
+    virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 64-bit memory storage.
-    RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
+    virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    void LoadValueDirect(RegLocation rl_src, RegStorage r_dest);
+    virtual void LoadValueDirect(RegLocation rl_src, RegStorage r_dest);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    void LoadValueDirectFixed(RegLocation rl_src, RegStorage r_dest);
+    virtual void LoadValueDirectFixed(RegLocation rl_src, RegStorage r_dest);
     // Load Dalvik value with 64-bit memory storage.
-    void LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest);
+    virtual void LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest);
     // Load Dalvik value with 64-bit memory storage.
-    void LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest);
+    virtual void LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest);
     // Store an item of natural word size.
-    LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
+    virtual LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
       return StoreBaseDisp(r_base, displacement, r_src, kWord);
     }
     // Store an uncompressed reference into a compressed 32-bit container.
-    LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src) {
+    virtual LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src) {
       return StoreBaseDisp(r_base, displacement, r_src, kReference);
     }
     // Store 32 bits, regardless of target.
-    LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
+    virtual LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
       return StoreBaseDisp(r_base, displacement, r_src, k32);
     }
 
@@ -944,7 +983,7 @@
      * @param rl_dest The destination dalvik register location.
      * @param rl_src The source register location. Can be either physical register or dalvik register.
      */
-    void StoreValue(RegLocation rl_dest, RegLocation rl_src);
+    virtual void StoreValue(RegLocation rl_dest, RegLocation rl_src);
 
     /**
      * @brief Used to do the final store in a wide destination as per bytecode semantics.
@@ -953,7 +992,7 @@
      * @param rl_src The source register location. Can be either physical register or dalvik
      *  register.
      */
-    void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
+    virtual void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
 
     /**
      * @brief Used to do the final store to a destination as per bytecode semantics.
@@ -965,7 +1004,7 @@
      * register value that now needs to be properly registered.  This is used to avoid an
      * extra register copy that would result if StoreValue was called.
      */
-    void StoreFinalValue(RegLocation rl_dest, RegLocation rl_src);
+    virtual void StoreFinalValue(RegLocation rl_dest, RegLocation rl_src);
 
     /**
      * @brief Used to do the final store in a wide destination as per bytecode semantics.
@@ -977,14 +1016,14 @@
      * register values that now need to be properly registered.  This is used to avoid an
      * extra pair of register copies that would result if StoreValueWide was called.
      */
-    void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
+    virtual void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
 
     // Shared by all targets - implemented in mir_to_lir.cc.
     void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list);
-    void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
+    virtual void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
     bool MethodBlockCodeGen(BasicBlock* bb);
     bool SpecialMIR2LIR(const InlineMethod& special);
-    void MethodMIR2LIR();
+    virtual void MethodMIR2LIR();
     // Update LIR for verbose listings.
     void UpdateLIROffsets();
 
@@ -1060,8 +1099,6 @@
     virtual void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) = 0;
 
     // Required for target - register utilities.
-    virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class) = 0;
-    virtual RegStorage AllocTypedTempWide(bool fp_hint, int reg_class) = 0;
     virtual RegStorage TargetReg(SpecialTargetRegister reg) = 0;
     virtual RegStorage GetArgMappingToPhysicalReg(int arg_num) = 0;
     virtual RegLocation GetReturnAlt() = 0;
@@ -1074,7 +1111,6 @@
     virtual void AdjustSpillMask() = 0;
     virtual void ClobberCallerSave() = 0;
     virtual void FreeCallTemps() = 0;
-    virtual void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) = 0;
     virtual void LockCallTemps() = 0;
     virtual void MarkPreservedSingle(int v_reg, RegStorage reg) = 0;
     virtual void MarkPreservedDouble(int v_reg, RegStorage reg) = 0;
@@ -1333,14 +1369,14 @@
      * @param loc location of result
      * @returns update location
      */
-    RegLocation ForceTemp(RegLocation loc);
+    virtual RegLocation ForceTemp(RegLocation loc);
 
     /*
      * @brief Force a wide location (in registers) into temporary registers
      * @param loc location of result
      * @returns update location
      */
-    RegLocation ForceTempWide(RegLocation loc);
+    virtual RegLocation ForceTempWide(RegLocation loc);
 
     static constexpr OpSize LoadStoreOpSize(bool wide, bool ref) {
       return wide ? k64 : ref ? kReference : k32;
@@ -1386,7 +1422,7 @@
      */
     virtual bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
 
-  private:
+  protected:
     void ClobberBody(RegisterInfo* p);
     void SetCurrentDexPc(DexOffset dexpc) {
       current_dalvik_offset_ = dexpc;
@@ -1445,7 +1481,14 @@
 
     // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using
     // kArg2 as temp.
-    void CopyToArgumentRegs(RegStorage arg0, RegStorage arg1);
+    virtual void CopyToArgumentRegs(RegStorage arg0, RegStorage arg1);
+
+    /**
+     * @brief Load Constant into RegLocation
+     * @param rl_dest Destination RegLocation
+     * @param value Constant value
+     */
+    virtual void GenConst(RegLocation rl_dest, int value);
 
   public:
     // TODO: add accessors for these.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index bcc077b..2c51c1f 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -57,14 +57,19 @@
 
 Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
                                     const std::vector<RegStorage>& core_regs,
+                                    const std::vector<RegStorage>& core64_regs,
                                     const std::vector<RegStorage>& sp_regs,
                                     const std::vector<RegStorage>& dp_regs,
                                     const std::vector<RegStorage>& reserved_regs,
+                                    const std::vector<RegStorage>& reserved64_regs,
                                     const std::vector<RegStorage>& core_temps,
+                                    const std::vector<RegStorage>& core64_temps,
                                     const std::vector<RegStorage>& sp_temps,
                                     const std::vector<RegStorage>& dp_temps) :
-    core_regs_(arena, core_regs.size()), next_core_reg_(0), sp_regs_(arena, sp_regs.size()),
-    next_sp_reg_(0), dp_regs_(arena, dp_regs.size()), next_dp_reg_(0), m2l_(m2l)  {
+    core_regs_(arena, core_regs.size()), next_core_reg_(0),
+    core64_regs_(arena, core64_regs.size()), next_core64_reg_(0),
+    sp_regs_(arena, sp_regs.size()), next_sp_reg_(0),
+    dp_regs_(arena, dp_regs.size()), next_dp_reg_(0), m2l_(m2l)  {
   // Initialize the fast lookup map.
   m2l_->reginfo_map_.Reset();
   if (kIsDebugBuild) {
@@ -82,6 +87,11 @@
     m2l_->reginfo_map_.Put(reg.GetReg(), info);
     core_regs_.Insert(info);
   }
+  for (RegStorage reg : core64_regs) {
+    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
+    m2l_->reginfo_map_.Put(reg.GetReg(), info);
+    core64_regs_.Insert(info);
+  }
   for (RegStorage reg : sp_regs) {
     RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
     m2l_->reginfo_map_.Put(reg.GetReg(), info);
@@ -97,11 +107,17 @@
   for (RegStorage reg : reserved_regs) {
     m2l_->MarkInUse(reg);
   }
+  for (RegStorage reg : reserved64_regs) {
+    m2l_->MarkInUse(reg);
+  }
 
   // Mark temp regs - all others not in use can be used for promotion
   for (RegStorage reg : core_temps) {
     m2l_->MarkTemp(reg);
   }
+  for (RegStorage reg : core64_temps) {
+    m2l_->MarkTemp(reg);
+  }
   for (RegStorage reg : sp_temps) {
     m2l_->MarkTemp(reg);
   }
@@ -152,6 +168,9 @@
   } else {
     RegisterInfo* info = GetRegInfo(reg);
     if (info->IsTemp() && !info->IsDead()) {
+      if (info->GetReg() != info->Partner()) {
+        ClobberBody(GetRegInfo(info->Partner()));
+      }
       ClobberBody(info);
       if (info->IsAliased()) {
         ClobberAliases(info);
@@ -169,19 +188,7 @@
   for (RegisterInfo* alias = info->GetAliasChain(); alias != nullptr;
        alias = alias->GetAliasChain()) {
     DCHECK(!alias->IsAliased());  // Only the master should be marked as alised.
-    if (alias->SReg() != INVALID_SREG) {
-      alias->SetSReg(INVALID_SREG);
-      alias->ResetDefBody();
-      if (alias->IsWide()) {
-        alias->SetIsWide(false);
-        if (alias->GetReg() != alias->Partner()) {
-          RegisterInfo* p = GetRegInfo(alias->Partner());
-          p->SetIsWide(false);
-          p->MarkDead();
-          p->ResetDefBody();
-        }
-      }
-    }
+    ClobberBody(alias);
   }
 }
 
@@ -204,6 +211,11 @@
     GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
     for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
       if (info->SReg() == s_reg) {
+        if (info->GetReg() != info->Partner()) {
+          // Dealing with a pair - clobber the other half.
+          DCHECK(!info->IsAliased());
+          ClobberBody(GetRegInfo(info->Partner()));
+        }
         ClobberBody(info);
         if (info->IsAliased()) {
           ClobberAliases(info);
@@ -325,7 +337,7 @@
       next = 0;
     RegisterInfo* info = regs.Get(next);
     // Try to allocate a register that doesn't hold a live value.
-    if (info->IsTemp() && !info->InUse() && !info->IsLive()) {
+    if (info->IsTemp() && !info->InUse() && info->IsDead()) {
       Clobber(info->GetReg());
       info->MarkInUse();
       /*
@@ -349,7 +361,13 @@
       ClobberSReg(info->SReg());
       Clobber(info->GetReg());
       info->MarkInUse();
-      info->SetIsWide(false);
+      if (info->IsWide()) {
+        RegisterInfo* partner = GetRegInfo(info->Partner());
+        DCHECK_EQ(info->GetReg().GetRegNum(), partner->Partner().GetRegNum());
+        DCHECK(partner->IsWide());
+        info->SetIsWide(false);
+        partner->SetIsWide(false);
+      }
       *next_temp = next + 1;
       return info->GetReg();
     }
@@ -372,6 +390,18 @@
   return AllocTempBody(reg_pool_->core_regs_, &reg_pool_->next_core_reg_, true);
 }
 
+RegStorage Mir2Lir::AllocTempWide() {
+  RegStorage res;
+  if (reg_pool_->core64_regs_.Size() != 0) {
+    res = AllocTempBody(reg_pool_->core64_regs_, &reg_pool_->next_core64_reg_, true);
+  } else {
+    RegStorage low_reg = AllocTemp();
+    RegStorage high_reg = AllocTemp();
+    res = RegStorage::MakeRegPair(low_reg, high_reg);
+  }
+  return res;
+}
+
 RegStorage Mir2Lir::AllocTempSingle() {
   RegStorage res = AllocTempBody(reg_pool_->sp_regs_, &reg_pool_->next_sp_reg_, true);
   DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits();
@@ -384,6 +414,20 @@
   return res;
 }
 
+RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
+    return AllocTempDouble();
+  }
+  return AllocTempWide();
+}
+
+RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
+  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
+    return AllocTempSingle();
+  }
+  return AllocTemp();
+}
+
 RegStorage Mir2Lir::FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg) {
   RegStorage res;
   GrowableArray<RegisterInfo*>::Iterator it(&regs);
@@ -463,6 +507,20 @@
   }
 }
 
+void Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
+  DCHECK(rl_keep.wide);
+  DCHECK(rl_free.wide);
+  int free_low = rl_free.reg.GetLowReg();
+  int free_high = rl_free.reg.GetHighReg();
+  int keep_low = rl_keep.reg.GetLowReg();
+  int keep_high = rl_keep.reg.GetHighReg();
+  if ((free_low != keep_low) && (free_low != keep_high) &&
+      (free_high != keep_low) && (free_high != keep_high)) {
+    // No overlap, free both
+    FreeTemp(rl_free.reg);
+  }
+}
+
 bool Mir2Lir::IsLive(RegStorage reg) {
   bool res;
   if (reg.IsPair()) {
@@ -725,8 +783,6 @@
       FlushSpecificReg(info);
     }
     info->MarkDead();
-    info->SetSReg(INVALID_SREG);
-    info->ResetDefBody();
     info->SetIsWide(false);
   }
 }
@@ -742,35 +798,48 @@
   }
 }
 
-void Mir2Lir::MarkLiveReg(RegStorage reg, int s_reg) {
-  RegisterInfo* info = GetRegInfo(reg);
-  if ((info->SReg() == s_reg) && info->IsLive()) {
-    return;  // Already live.
-  }
-  if (s_reg != INVALID_SREG) {
-    ClobberSReg(s_reg);
-    if (info->IsTemp()) {
-      info->MarkLive();
-    }
-  } else {
-    // Can't be live if no associated s_reg.
-    DCHECK(info->IsTemp());
-    info->MarkDead();
-  }
-  info->SetSReg(s_reg);
-}
-
 void Mir2Lir::MarkLive(RegLocation loc) {
   RegStorage reg = loc.reg;
+  if (!IsTemp(reg)) {
+    return;
+  }
   int s_reg = loc.s_reg_low;
-  if (reg.IsPair()) {
-    MarkLiveReg(reg.GetLow(), s_reg);
-    MarkLiveReg(reg.GetHigh(), s_reg+1);
-  } else {
-    if (loc.wide) {
-      ClobberSReg(s_reg + 1);
+  if (s_reg == INVALID_SREG) {
+    // Can't be live if no associated sreg.
+    if (reg.IsPair()) {
+      GetRegInfo(reg.GetLow())->MarkDead();
+      GetRegInfo(reg.GetHigh())->MarkDead();
+    } else {
+      GetRegInfo(reg)->MarkDead();
     }
-    MarkLiveReg(reg, s_reg);
+  } else {
+    if (reg.IsPair()) {
+      RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
+      RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
+      if (info_lo->IsLive() && (info_lo->SReg() == s_reg) && info_hi->IsLive() &&
+          (info_hi->SReg() == s_reg)) {
+        return;  // Already live.
+      }
+      ClobberSReg(s_reg);
+      ClobberSReg(s_reg + 1);
+      info_lo->MarkLive(s_reg);
+      info_hi->MarkLive(s_reg + 1);
+    } else {
+      RegisterInfo* info = GetRegInfo(reg);
+      if (info->IsLive() && (info->SReg() == s_reg)) {
+        return;  // Already live.
+      }
+      ClobberSReg(s_reg);
+      if (loc.wide) {
+        ClobberSReg(s_reg + 1);
+      }
+      info->MarkLive(s_reg);
+    }
+    if (loc.wide) {
+      MarkWide(reg);
+    } else {
+      MarkNarrow(reg);
+    }
   }
 }
 
@@ -792,6 +861,13 @@
   if (reg.IsPair()) {
     RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
     RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
+    // Unpair any old partners.
+    if (info_lo->IsWide() && info_lo->Partner() != info_hi->GetReg()) {
+      GetRegInfo(info_lo->Partner())->SetIsWide(false);
+    }
+    if (info_hi->IsWide() && info_hi->Partner() != info_lo->GetReg()) {
+      GetRegInfo(info_hi->Partner())->SetIsWide(false);
+    }
     info_lo->SetIsWide(true);
     info_hi->SetIsWide(true);
     info_lo->SetPartner(reg.GetHigh());
@@ -803,6 +879,13 @@
   }
 }
 
+void Mir2Lir::MarkNarrow(RegStorage reg) {
+  DCHECK(!reg.IsPair());
+  RegisterInfo* info = GetRegInfo(reg);
+  info->SetIsWide(false);
+  info->SetPartner(reg);
+}
+
 void Mir2Lir::MarkClean(RegLocation loc) {
   if (loc.reg.IsPair()) {
     RegisterInfo* info = GetRegInfo(loc.reg.GetLow());
@@ -842,16 +925,17 @@
 }
 
 bool Mir2Lir::CheckCorePoolSanity() {
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
+  GrowableArray<RegisterInfo*>::Iterator it(&tempreg_info_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
-    RegStorage my_reg = info->GetReg();
-    if (info->IsWide() && my_reg.IsPair()) {
+    if (info->IsTemp() && info->IsLive() && info->IsWide()) {
+      RegStorage my_reg = info->GetReg();
       int my_sreg = info->SReg();
       RegStorage partner_reg = info->Partner();
       RegisterInfo* partner = GetRegInfo(partner_reg);
       DCHECK(partner != NULL);
       DCHECK(partner->IsWide());
       DCHECK_EQ(my_reg.GetReg(), partner->Partner().GetReg());
+      DCHECK(partner->IsLive());
       int partner_sreg = partner->SReg();
       if (my_sreg == INVALID_SREG) {
         DCHECK_EQ(partner_sreg, INVALID_SREG);
@@ -859,13 +943,41 @@
         int diff = my_sreg - partner_sreg;
         DCHECK((diff == 0) || (diff == -1) || (diff == 1));
       }
-    } else {
-      // TODO: add whatever sanity checks might be useful for 64BitSolo regs here.
-      // TODO: sanity checks for floating point pools?
     }
-    if (!info->IsLive()) {
-      DCHECK(info->DefStart() == NULL);
-      DCHECK(info->DefEnd() == NULL);
+    if (info->Master() != info) {
+      // Aliased.
+      if (info->IsLive() && (info->SReg() != INVALID_SREG)) {
+        // If I'm live, master should not be live, but should show liveness in alias set.
+        DCHECK_EQ(info->Master()->SReg(), INVALID_SREG);
+        DCHECK(!info->Master()->IsDead());
+      } else if (!info->IsDead()) {
+        // If I'm not live, but there is liveness in the set master must be live.
+        DCHECK_EQ(info->SReg(), INVALID_SREG);
+        DCHECK(info->Master()->IsLive());
+      }
+    }
+    if (info->IsAliased()) {
+      // Has child aliases.
+      DCHECK_EQ(info->Master(), info);
+      if (info->IsLive() && (info->SReg() != INVALID_SREG)) {
+        // Master live, no child should be dead - all should show liveness in set.
+        for (RegisterInfo* p = info->GetAliasChain(); p != nullptr; p = p->GetAliasChain()) {
+          DCHECK(!p->IsDead());
+          DCHECK_EQ(p->SReg(), INVALID_SREG);
+        }
+      } else if (!info->IsDead()) {
+        // Master not live, one or more aliases must be.
+        bool live_alias = false;
+        for (RegisterInfo* p = info->GetAliasChain(); p != nullptr; p = p->GetAliasChain()) {
+          live_alias |= p->IsLive();
+        }
+        DCHECK(live_alias);
+      }
+    }
+    if (info->IsLive() && (info->SReg() == INVALID_SREG)) {
+      // If not fully live, should have INVALID_SREG and def's should be null.
+      DCHECK(info->DefStart() == nullptr);
+      DCHECK(info->DefEnd() == nullptr);
     }
   }
   return true;
@@ -956,11 +1068,12 @@
     if (!RegClassMatches(reg_class, loc.reg)) {
       // Wrong register class.  Reallocate and transfer ownership.
       RegStorage new_regs = AllocTypedTempWide(loc.fp, reg_class);
-      // Associate the old sreg with the new register and clobber the old register.
-      GetRegInfo(new_regs)->SetSReg(GetRegInfo(loc.reg)->SReg());
+      // Clobber the old regs.
       Clobber(loc.reg);
+      // ...and mark the new ones live.
       loc.reg = new_regs;
       MarkWide(loc.reg);
+      MarkLive(loc);
     }
     return loc;
   }
@@ -989,10 +1102,11 @@
     if (!RegClassMatches(reg_class, loc.reg)) {
       // Wrong register class.  Reallocate and transfer ownership.
       RegStorage new_reg = AllocTypedTemp(loc.fp, reg_class);
-      // Associate the old sreg with the new register and clobber the old register.
-      GetRegInfo(new_reg)->SetSReg(GetRegInfo(loc.reg)->SReg());
+      // Clobber the old reg.
       Clobber(loc.reg);
+      // ...and mark the new one live.
       loc.reg = new_reg;
+      MarkLive(loc);
     }
     return loc;
   }
@@ -1220,19 +1334,9 @@
   RegLocation gpr_res = LocCReturnWide();
   RegLocation fpr_res = LocCReturnDouble();
   RegLocation res = is_double ? fpr_res : gpr_res;
-  if (res.reg.IsPair()) {
-    Clobber(res.reg);
-    LockTemp(res.reg);
-    // Does this wide value live in two registers or one vector register?
-    if (res.reg.GetLowReg() != res.reg.GetHighReg()) {
-      // FIXME: I think we want to mark these as wide as well.
-      MarkWide(res.reg);
-    }
-  } else {
-    Clobber(res.reg);
-    LockTemp(res.reg);
-    MarkWide(res.reg);
-  }
+  Clobber(res.reg);
+  LockTemp(res.reg);
+  MarkWide(res.reg);
   return res;
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index cc0e1f2..3070edd 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -22,7 +22,7 @@
 
 namespace art {
 
-class X86Mir2Lir FINAL : public Mir2Lir {
+class X86Mir2Lir : public Mir2Lir {
   public:
     X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
 
@@ -54,8 +54,6 @@
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
-    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
-    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -68,7 +66,6 @@
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
@@ -176,8 +173,8 @@
       * @param op The DEX opcode for the operation.
       * @param is_commutative The sources can be swapped if needed.
       */
-    void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                      Instruction::Code op, bool is_commutative);
+    virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                              Instruction::Code op, bool is_commutative);
 
     /**
       * @brief Generate a two operand long arithmetic operation.
@@ -193,7 +190,7 @@
       * @param rl_src The other operand.  May be in a register or in memory.
       * @param op The DEX opcode for the operation.
       */
-    void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+    virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
     /**
      * @brief Implement instanceof a final class with x86 specific code.
@@ -266,6 +263,12 @@
     bool InexpensiveConstantDouble(int64_t value);
 
     /*
+     * @brief Should try to optimize for two address instructions?
+     * @return true if we try to avoid generating three operand instructions.
+     */
+    virtual bool GenerateTwoOperandInstructions() const { return true; }
+
+    /*
      * @brief x86 specific codegen for int operations.
      * @param opcode Operation to perform.
      * @param rl_dest Destination for the result.
@@ -305,7 +308,7 @@
      * @param type How the method will be invoked.
      * @returns Call instruction
      */
-    LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+    virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
 
     /*
      * @brief Handle x86 specific literals
@@ -324,7 +327,7 @@
      */
     std::vector<uint8_t>* ReturnCallFrameInformation();
 
-  private:
+  protected:
     size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib);
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
@@ -399,6 +402,12 @@
     static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
 
     /*
+     * @brief Ensure that a temporary register is byte addressable.
+     * @returns a temporary guarenteed to be byte addressable.
+     */
+    virtual RegStorage AllocateByteRegister();
+
+    /*
      * @brief generate inline code for fast case of Strng.indexOf.
      * @param info Call parameters
      * @param zero_based 'true' if the index into the string is 0.
@@ -534,7 +543,7 @@
      * @param rl_src The source of the long.
      * @param is_double 'true' if dealing with double, 'false' for float.
      */
-    void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
+    virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
 
     /*
      * @brief Perform MIR analysis before compiling method.
@@ -580,7 +589,7 @@
      * @param bb Basic block containing instruction.
      * @param mir Instruction to analyze.
      */
-    void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
+    virtual void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
 
     /*
      * @brief Analyze one MIR float/double instruction
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index fbb1785..a6ccc99 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1292,8 +1292,12 @@
     case Instruction::AND_LONG_2ADDR:
     case Instruction::OR_LONG_2ADDR:
     case Instruction::XOR_LONG_2ADDR:
-      GenLongArith(rl_dest, rl_src2, op);
-      return;
+      if (GenerateTwoOperandInstructions()) {
+        GenLongArith(rl_dest, rl_src2, op);
+        return;
+      }
+      break;
+
     default:
       break;
   }
@@ -1532,7 +1536,7 @@
 
 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                           RegLocation rl_src, int shift_amount) {
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   switch (opcode) {
     case Instruction::SHL_LONG:
     case Instruction::SHL_LONG_2ADDR:
@@ -1542,7 +1546,6 @@
         LoadConstant(rl_result.reg.GetLow(), 0);
       } else if (shift_amount > 31) {
         OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        FreeTemp(rl_src.reg.GetHigh());
         NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
         LoadConstant(rl_result.reg.GetLow(), 0);
       } else {
@@ -1642,7 +1645,11 @@
     case Instruction::XOR_LONG_2ADDR:
     case Instruction::AND_LONG_2ADDR:
       if (rl_src2.is_const) {
-        GenLongImm(rl_dest, rl_src2, opcode);
+        if (GenerateTwoOperandInstructions()) {
+          GenLongImm(rl_dest, rl_src2, opcode);
+        } else {
+          GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+        }
       } else {
         DCHECK(rl_src1.is_const);
         GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
@@ -1870,7 +1877,7 @@
 
   // SETcc only works with EAX..EDX.
   if (result_reg == object.reg || result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
-    result_reg = AllocTypedTemp(false, kCoreReg);
+    result_reg = AllocateByteRegister();
     DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum());
   }
 
@@ -2111,12 +2118,16 @@
       LOG(FATAL) << "Invalid word arith op: " << opcode;
   }
 
-    // Can we convert to a two address instruction?
+  // Can we convert to a two address instruction?
   if (!is_two_addr &&
         (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
          mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
-      is_two_addr = true;
-    }
+    is_two_addr = true;
+  }
+
+  if (!GenerateTwoOperandInstructions()) {
+    is_two_addr = false;
+  }
 
   // Get the div/rem stuff out of the way.
   if (is_div_rem) {
@@ -2213,6 +2224,8 @@
             if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
               rl_lhs = LoadValue(rl_lhs, kCoreReg);
               rl_result = EvalLoc(rl_dest, kCoreReg, true);
+              // No-op if these are the same.
+              OpRegCopy(rl_result.reg, rl_lhs.reg);
             } else {
               rl_result = EvalLoc(rl_dest, kCoreReg, true);
               LoadValueDirect(rl_lhs, rl_result.reg);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 237c68c..e7a629a 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -81,6 +81,7 @@
 #endif
 };
 
+static const std::vector<RegStorage> empty_pool;
 static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
     core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
 static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
@@ -423,6 +424,10 @@
   UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
 }
 
+RegStorage X86Mir2Lir::AllocateByteRegister() {
+  return AllocTypedTemp(false, kCoreReg);
+}
+
 /* Clobber all regs that might be used by an external C call */
 void X86Mir2Lir::ClobberCallerSave() {
   Clobber(rs_rAX);
@@ -524,30 +529,15 @@
 #endif
 }
 
-// Alloc a pair of core registers, or a double.
-RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble();
-  }
-  RegStorage low_reg = AllocTemp();
-  RegStorage high_reg = AllocTemp();
-  return RegStorage::MakeRegPair(low_reg, high_reg);
-}
-
-RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempSingle();
-  }
-  return AllocTemp();
-}
-
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
   if (Gen64Bit()) {
-    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, sp_regs_64, dp_regs_64, reserved_regs_64,
-                                        core_temps_64, sp_temps_64, dp_temps_64);
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, empty_pool, core_regs_64, sp_regs_64,
+                                          dp_regs_64, empty_pool, reserved_regs_64,
+                                          empty_pool, core_temps_64, sp_temps_64, dp_temps_64);
   } else {
-    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, sp_regs_32, dp_regs_32, reserved_regs_32,
-                                        core_temps_32, sp_temps_32, dp_temps_32);
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
+                                          dp_regs_32, reserved_regs_32, empty_pool,
+                                          core_temps_32, empty_pool, sp_temps_32, dp_temps_32);
   }
 
   // Target-specific adjustments.
@@ -572,20 +562,6 @@
   reg_pool_->next_dp_reg_ = 1;
 }
 
-void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  int free_low = rl_free.reg.GetLowReg();
-  int free_high = rl_free.reg.GetHighReg();
-  int keep_low = rl_keep.reg.GetLowReg();
-  int keep_high = rl_keep.reg.GetHighReg();
-  if ((free_low != keep_low) && (free_low != keep_high) &&
-      (free_high != keep_low) && (free_high != keep_high)) {
-    // No overlap, free both
-    FreeTemp(rl_free.reg);
-  }
-}
-
 void X86Mir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index e9592a6..fed31c1 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -161,6 +161,22 @@
       case kOpMul:
         opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
         return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), value);
+      case kOp2Byte:
+        opcode = kX86Mov32RI;
+        value = static_cast<int8_t>(value);
+        break;
+      case kOp2Short:
+        opcode = kX86Mov32RI;
+        value = static_cast<int16_t>(value);
+        break;
+      case kOp2Char:
+        opcode = kX86Mov32RI;
+        value = static_cast<uint16_t>(value);
+        break;
+      case kOpNeg:
+        opcode = kX86Mov32RI;
+        value = -value;
+        break;
       default:
         LOG(FATAL) << "Bad case in OpRegImm " << op;
     }
@@ -523,7 +539,7 @@
     int32_t val_hi = High32Bits(value);
     int32_t low_reg_val = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
     LIR *res;
-    bool is_fp = RegStorage::IsFloat(low_reg_val);
+    bool is_fp = r_dest.IsFloat();
     // TODO: clean this up once we fully recognize 64-bit storage containers.
     if (is_fp) {
       if (value == 0) {
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 5f89c21..865311b 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -182,7 +182,7 @@
     dom_post_order_traversal_->Reset();
   }
   ClearAllVisitedFlags();
-  std::vector<std::pair<BasicBlock*, ArenaBitVector::Iterator*> > work_stack;
+  std::vector<std::pair<BasicBlock*, ArenaBitVector::Iterator*>> work_stack;
   bb->visited = true;
   work_stack.push_back(std::make_pair(bb, bb->i_dominated->GetIterator()));
   while (!work_stack.empty()) {
@@ -557,8 +557,7 @@
       if (!phi_bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) {
         continue;
       }
-      MIR *phi =
-          static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocDFInfo));
+      MIR *phi = NewMIR();
       phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
       phi->dalvikInsn.vA = dalvik_reg;
       phi->offset = phi_bb->start_offset;
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index e19f3cf..01c8f80 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -17,6 +17,7 @@
 #include "verified_method.h"
 
 #include <algorithm>
+#include <memory>
 #include <vector>
 
 #include "base/logging.h"
@@ -34,7 +35,6 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
-#include "UniquePtrCompat.h"
 #include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
@@ -45,7 +45,7 @@
 
 const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_verifier,
                                              bool compile) {
-  UniquePtr<VerifiedMethod> verified_method(new VerifiedMethod);
+  std::unique_ptr<VerifiedMethod> verified_method(new VerifiedMethod);
   if (compile) {
     /* Generate a register map. */
     if (!verified_method->GenerateGcMap(method_verifier)) {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 93feb29..0f41d2b 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -502,7 +502,7 @@
                                 const std::vector<const DexFile*>& dex_files,
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
-  UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+  std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
   PreCompile(class_loader, dex_files, thread_pool.get(), timings);
   Compile(class_loader, dex_files, thread_pool.get(), timings);
   if (dump_stats_) {
@@ -568,7 +568,7 @@
   std::vector<const DexFile*> dex_files;
   dex_files.push_back(dex_file);
 
-  UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
+  std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
   PreCompile(jclass_loader, dex_files, thread_pool.get(), timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
@@ -626,7 +626,7 @@
 }
 
 static void ResolveExceptionsForMethod(MethodHelper* mh,
-    std::set<std::pair<uint16_t, const DexFile*> >& exceptions_to_resolve)
+    std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   const DexFile::CodeItem* code_item = mh->GetCodeItem();
   if (code_item == NULL) {
@@ -665,8 +665,8 @@
 
 static bool ResolveCatchBlockExceptionsClassVisitor(mirror::Class* c, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  std::set<std::pair<uint16_t, const DexFile*> >* exceptions_to_resolve =
-      reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*> >*>(arg);
+  std::set<std::pair<uint16_t, const DexFile*>>* exceptions_to_resolve =
+      reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*>>*>(arg);
   MethodHelper mh;
   for (size_t i = 0; i < c->NumVirtualMethods(); ++i) {
     mirror::ArtMethod* m = c->GetVirtualMethod(i);
@@ -720,7 +720,7 @@
   // Resolve exception classes referenced by the loaded classes. The catch logic assumes
   // exceptions are resolved by the verifier when there is a catch block in an interested method.
   // Do this here so that exception classes appear to have been specified image classes.
-  std::set<std::pair<uint16_t, const DexFile*> > unresolved_exception_types;
+  std::set<std::pair<uint16_t, const DexFile*>> unresolved_exception_types;
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> java_lang_Throwable(
       hs.NewHandle(class_linker->FindSystemClass(self, "Ljava/lang/Throwable;")));
@@ -1152,28 +1152,22 @@
       *type = sharp_type;
     }
   } else {
-    if (compiling_boot) {
+    bool method_in_image = compiling_boot ||
+        Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace();
+    if (method_in_image) {
+      CHECK(!method->IsAbstract());
       *type = sharp_type;
-      *direct_method = -1;
-      *direct_code = -1;
+      *direct_method = compiling_boot ? -1 : reinterpret_cast<uintptr_t>(method);
+      *direct_code = compiling_boot ? -1 : compiler_->GetEntryPointOf(method);
+      target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+      target_method->dex_method_index = method->GetDexMethodIndex();
+    } else if (!must_use_direct_pointers) {
+      // Set the code and rely on the dex cache for the method.
+      *type = sharp_type;
+      *direct_code = compiler_->GetEntryPointOf(method);
     } else {
-      bool method_in_image =
-          Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace();
-      if (method_in_image) {
-        CHECK(!method->IsAbstract());
-        *type = sharp_type;
-        *direct_method = reinterpret_cast<uintptr_t>(method);
-        *direct_code = compiler_->GetEntryPointOf(method);
-        target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-        target_method->dex_method_index = method->GetDexMethodIndex();
-      } else if (!must_use_direct_pointers) {
-        // Set the code and rely on the dex cache for the method.
-        *type = sharp_type;
-        *direct_code = compiler_->GetEntryPointOf(method);
-      } else {
-        // Direct pointers were required but none were available.
-        VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method);
-      }
+      // Direct pointers were required but none were available.
+      VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method);
     }
   }
 }
@@ -1369,7 +1363,7 @@
     self->AssertNoPendingException();
     CHECK_GT(work_units, 0U);
 
-    index_ = begin;
+    index_.StoreRelaxed(begin);
     for (size_t i = 0; i < work_units; ++i) {
       thread_pool_->AddTask(self, new ForAllClosure(this, end, callback));
     }
@@ -1384,7 +1378,7 @@
   }
 
   size_t NextIndex() {
-    return index_.FetchAndAdd(1);
+    return index_.FetchAndAddSequentiallyConsistent(1);
   }
 
  private:
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index f3db41f..abca659 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -688,7 +688,7 @@
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
 
-  UniquePtr<Compiler> compiler_;
+  std::unique_ptr<Compiler> compiler_;
 
   const InstructionSet instruction_set_;
   const InstructionSetFeatures instruction_set_features_;
@@ -712,13 +712,13 @@
   // If image_ is true, specifies the classes that will be included in
   // the image. Note if image_classes_ is NULL, all classes are
   // included in the image.
-  UniquePtr<DescriptorSet> image_classes_;
+  std::unique_ptr<DescriptorSet> image_classes_;
 
   size_t thread_count_;
   uint64_t start_ns_;
 
   class AOTCompilationStats;
-  UniquePtr<AOTCompilationStats> stats_;
+  std::unique_ptr<AOTCompilationStats> stats_;
 
   bool dump_stats_;
   const bool dump_passes_;
@@ -755,7 +755,7 @@
   bool support_boot_image_fixup_;
 
   // Call Frame Information, which might be generated to help stack tracebacks.
-  UniquePtr<std::vector<uint8_t> > cfi_info_;
+  std::unique_ptr<std::vector<uint8_t>> cfi_info_;
 
   // DeDuplication data structures, these own the corresponding byte arrays.
   class DedupeHashFunc {
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index fe3a4e6..4efd27d 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -18,8 +18,8 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <memory>
 
-#include "UniquePtrCompat.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
diff --git a/compiler/elf_fixup.cc b/compiler/elf_fixup.cc
index 571a091..404e3f8 100644
--- a/compiler/elf_fixup.cc
+++ b/compiler/elf_fixup.cc
@@ -17,12 +17,12 @@
 #include "elf_fixup.h"
 
 #include <inttypes.h>
+#include <memory>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "elf_file.h"
 #include "elf_writer.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -30,7 +30,7 @@
 
 bool ElfFixup::Fixup(File* file, uintptr_t oat_data_begin) {
   std::string error_msg;
-  UniquePtr<ElfFile> elf_file(ElfFile::Open(file, true, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, &error_msg));
   CHECK(elf_file.get() != nullptr) << error_msg;
 
   // Lookup "oatdata" symbol address.
diff --git a/compiler/elf_stripper.cc b/compiler/elf_stripper.cc
index b0fa63c..8c06c9f 100644
--- a/compiler/elf_stripper.cc
+++ b/compiler/elf_stripper.cc
@@ -18,9 +18,9 @@
 
 #include <unistd.h>
 #include <sys/types.h>
+#include <memory>
 #include <vector>
 
-#include "UniquePtrCompat.h"
 #include "base/logging.h"
 #include "elf_file.h"
 #include "elf_utils.h"
@@ -29,7 +29,7 @@
 namespace art {
 
 bool ElfStripper::Strip(File* file, std::string* error_msg) {
-  UniquePtr<ElfFile> elf_file(ElfFile::Open(file, true, false, error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, error_msg));
   if (elf_file.get() == nullptr) {
     return false;
   }
diff --git a/compiler/elf_writer.cc b/compiler/elf_writer.cc
index ccc26a1..4c093c7 100644
--- a/compiler/elf_writer.cc
+++ b/compiler/elf_writer.cc
@@ -42,7 +42,7 @@
                                      size_t& oat_loaded_size,
                                      size_t& oat_data_offset) {
   std::string error_msg;
-  UniquePtr<ElfFile> elf_file(ElfFile::Open(file, false, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, false, false, &error_msg));
   CHECK(elf_file.get() != NULL) << error_msg;
 
   oat_loaded_size = elf_file->GetLoadedSize();
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index aa4a5b2..0e27210 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -159,7 +159,7 @@
 void ElfWriterMclinker::AddOatInput(std::vector<uint8_t>& oat_contents) {
   // Add an artificial memory input. Based on LinkerTest.
   std::string error_msg;
-  UniquePtr<OatFile> oat_file(OatFile::OpenMemory(oat_contents, elf_file_->GetPath(), &error_msg));
+  std::unique_ptr<OatFile> oat_file(OatFile::OpenMemory(oat_contents, elf_file_->GetPath(), &error_msg));
   CHECK(oat_file.get() != NULL) << elf_file_->GetPath() << ": " << error_msg;
 
   const char* oat_data_start = reinterpret_cast<const char*>(&oat_file->GetOatHeader());
@@ -347,7 +347,7 @@
 
 void ElfWriterMclinker::FixupOatMethodOffsets(const std::vector<const DexFile*>& dex_files) {
   std::string error_msg;
-  UniquePtr<ElfFile> elf_file(ElfFile::Open(elf_file_, true, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(elf_file_, true, false, &error_msg));
   CHECK(elf_file.get() != NULL) << elf_file_->GetPath() << ": " << error_msg;
 
   uint32_t oatdata_address = GetOatDataAddress(elf_file.get());
diff --git a/compiler/elf_writer_mclinker.h b/compiler/elf_writer_mclinker.h
index 3c1a47b..955e5d2 100644
--- a/compiler/elf_writer_mclinker.h
+++ b/compiler/elf_writer_mclinker.h
@@ -17,9 +17,9 @@
 #ifndef ART_COMPILER_ELF_WRITER_MCLINKER_H_
 #define ART_COMPILER_ELF_WRITER_MCLINKER_H_
 
-#include "elf_writer.h"
+#include <memory>
 
-#include "UniquePtrCompat.h"
+#include "elf_writer.h"
 #include "safe_map.h"
 
 namespace mcld {
@@ -73,11 +73,11 @@
                                    const CompiledCode& compiled_code);
 
   // Setup by Init()
-  UniquePtr<mcld::LinkerConfig> linker_config_;
-  UniquePtr<mcld::LinkerScript> linker_script_;
-  UniquePtr<mcld::Module> module_;
-  UniquePtr<mcld::IRBuilder> ir_builder_;
-  UniquePtr<mcld::Linker> linker_;
+  std::unique_ptr<mcld::LinkerConfig> linker_config_;
+  std::unique_ptr<mcld::LinkerScript> linker_script_;
+  std::unique_ptr<mcld::Module> module_;
+  std::unique_ptr<mcld::IRBuilder> ir_builder_;
+  std::unique_ptr<mcld::Linker> linker_;
 
   // Setup by AddOatInput()
   // TODO: ownership of oat_input_?
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index 864dadc..e637cfb 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -44,18 +44,15 @@
   } while (false)
 
 TEST_F(ElfWriterTest, dlsym) {
-  std::string elf_filename;
+  std::string elf_location;
   if (IsHost()) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
     CHECK(host_dir != NULL);
-    elf_filename = StringPrintf("%s/framework/core.oat", host_dir);
+    elf_location = StringPrintf("%s/framework/core.oat", host_dir);
   } else {
-#ifdef __LP64__
-    elf_filename = "/data/art-test64/core.oat";
-#else
-    elf_filename = "/data/art-test/core.oat";
-#endif
+    elf_location = "/data/art-test/core.oat";
   }
+  std::string elf_filename = GetSystemImageFilename(elf_location.c_str(), kRuntimeISA);
   LOG(INFO) << "elf_filename=" << elf_filename;
 
   UnreserveImageSpace();
@@ -85,11 +82,11 @@
   }
 #endif
 
-  UniquePtr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
   ASSERT_TRUE(file.get() != NULL);
   {
     std::string error_msg;
-    UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatdata, "oatdata", false);
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatexec, "oatexec", false);
@@ -97,7 +94,7 @@
   }
   {
     std::string error_msg;
-    UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatdata, "oatdata", true);
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatexec, "oatexec", true);
@@ -105,7 +102,7 @@
   }
   {
     std::string error_msg;
-    UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, true, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, true, &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
     CHECK(ef->Load(false, &error_msg)) << error_msg;
     EXPECT_EQ(dl_oatdata, ef->FindDynamicSymbolAddress("oatdata"));
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 5a79542..92be147 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -16,6 +16,7 @@
 
 #include "image.h"
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -27,7 +28,6 @@
 #include "lock_word.h"
 #include "mirror/object-inl.h"
 #include "signal_catcher.h"
-#include "UniquePtrCompat.h"
 #include "utils.h"
 #include "vector_output_stream.h"
 
@@ -42,9 +42,23 @@
 };
 
 TEST_F(ImageTest, WriteRead) {
-  // Create a root tmp file, to be the base of the .art and .oat temporary files.
-  ScratchFile tmp;
-  ScratchFile tmp_elf(tmp, "oat");
+  // Create a generic location tmp file, to be the base of the .art and .oat temporary files.
+  ScratchFile location;
+  ScratchFile image_location(location, ".art");
+
+  std::string image_filename(GetSystemImageFilename(image_location.GetFilename().c_str(),
+                                                    kRuntimeISA));
+  size_t pos = image_filename.rfind('/');
+  CHECK_NE(pos, std::string::npos) << image_filename;
+  std::string image_dir(image_filename, 0, pos);
+  int mkdir_result = mkdir(image_dir.c_str(), 0700);
+  CHECK_EQ(0, mkdir_result) << image_dir;
+  ScratchFile image_file(OS::CreateEmptyFile(image_filename.c_str()));
+
+  std::string oat_filename(image_filename, 0, image_filename.size() - 3);
+  oat_filename += "oat";
+  ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str()));
+
   {
     {
       jobject class_loader = NULL;
@@ -68,28 +82,27 @@
                                                 !kIsTargetBuild,
                                                 class_linker->GetBootClassPath(),
                                                 &oat_writer,
-                                                tmp_elf.GetFile());
+                                                oat_file.GetFile());
       ASSERT_TRUE(success);
       timings.EndSplit();
     }
   }
-  // Workound bug that mcld::Linker::emit closes tmp_elf by reopening as tmp_oat.
-  UniquePtr<File> tmp_oat(OS::OpenFileReadWrite(tmp_elf.GetFilename().c_str()));
-  ASSERT_TRUE(tmp_oat.get() != NULL);
+  // Workound bug that mcld::Linker::emit closes oat_file by reopening as dup_oat.
+  std::unique_ptr<File> dup_oat(OS::OpenFileReadWrite(oat_file.GetFilename().c_str()));
+  ASSERT_TRUE(dup_oat.get() != NULL);
 
-  ScratchFile tmp_image(tmp, "art");
   const uintptr_t requested_image_base = ART_BASE_ADDRESS;
   {
     ImageWriter writer(*compiler_driver_.get());
-    bool success_image = writer.Write(tmp_image.GetFilename(), requested_image_base,
-                                      tmp_oat->GetPath(), tmp_oat->GetPath());
+    bool success_image = writer.Write(image_file.GetFilename(), requested_image_base,
+                                      dup_oat->GetPath(), dup_oat->GetPath());
     ASSERT_TRUE(success_image);
-    bool success_fixup = ElfFixup::Fixup(tmp_oat.get(), writer.GetOatDataBegin());
+    bool success_fixup = ElfFixup::Fixup(dup_oat.get(), writer.GetOatDataBegin());
     ASSERT_TRUE(success_fixup);
   }
 
   {
-    UniquePtr<File> file(OS::OpenFileForReading(tmp_image.GetFilename().c_str()));
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
     ASSERT_TRUE(file.get() != NULL);
     ImageHeader image_header;
     file->ReadFully(&image_header, sizeof(image_header));
@@ -117,7 +130,7 @@
   java_lang_dex_file_ = NULL;
 
   std::string error_msg;
-  UniquePtr<const DexFile> dex(DexFile::Open(GetLibCoreDexFileName().c_str(),
+  std::unique_ptr<const DexFile> dex(DexFile::Open(GetLibCoreDexFileName().c_str(),
                                              GetLibCoreDexFileName().c_str(),
                                              &error_msg));
   ASSERT_TRUE(dex.get() != nullptr) << error_msg;
@@ -127,7 +140,7 @@
 
   Runtime::Options options;
   std::string image("-Ximage:");
-  image.append(tmp_image.GetFilename());
+  image.append(image_location.GetFilename());
   options.push_back(std::make_pair(image.c_str(), reinterpret_cast<void*>(NULL)));
 
   if (!Runtime::Create(options, false)) {
@@ -166,6 +179,11 @@
     }
     EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false)));
   }
+
+  image_file.Unlink();
+  oat_file.Unlink();
+  int rmdir_result = rmdir(image_dir.c_str());
+  CHECK_EQ(0, rmdir_result);
 }
 
 TEST_F(ImageTest, ImageHeaderIsValid) {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index e261ee6..70144c8 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -18,6 +18,7 @@
 
 #include <sys/stat.h>
 
+#include <memory>
 #include <vector>
 
 #include "base/logging.h"
@@ -52,7 +53,6 @@
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
-#include "UniquePtrCompat.h"
 #include "utils.h"
 
 using ::art::mirror::ArtField;
@@ -77,7 +77,7 @@
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
-  UniquePtr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
+  std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
   if (oat_file.get() == NULL) {
     LOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location;
     return false;
@@ -141,7 +141,7 @@
   PatchOatCodeAndMethods();
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
 
-  UniquePtr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
+  std::unique_ptr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   if (image_file.get() == NULL) {
     LOG(ERROR) << "Failed to open image file " << image_filename;
@@ -418,7 +418,7 @@
   }
 
   // build an Object[] of the roots needed to restore the runtime
-  Handle<ObjectArray<Object> > image_roots(hs.NewHandle(
+  Handle<ObjectArray<Object>> image_roots(hs.NewHandle(
       ObjectArray<Object>::Alloc(self, object_array_class.Get(), ImageHeader::kImageRootsMax)));
   image_roots->Set<false>(ImageHeader::kResolutionMethod, runtime->GetResolutionMethod());
   image_roots->Set<false>(ImageHeader::kImtConflictMethod, runtime->GetImtConflictMethod());
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index f8df2bb..aff155a 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include <cstddef>
+#include <memory>
 #include <set>
 #include <string>
 
@@ -30,7 +31,6 @@
 #include "os.h"
 #include "safe_map.h"
 #include "gc/space/space.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -161,7 +161,7 @@
   OatFile* oat_file_;
 
   // Memory mapped for generating the image.
-  UniquePtr<MemMap> image_;
+  std::unique_ptr<MemMap> image_;
 
   // Offset to the free space in image_.
   size_t image_end_;
@@ -170,13 +170,13 @@
   byte* image_begin_;
 
   // Saved hashes (objects are inside of the image so that they don't move).
-  std::vector<std::pair<mirror::Object*, uint32_t> > saved_hashes_;
+  std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_;
 
   // Beginning target oat address for the pointers from the output image to its oat file.
   const byte* oat_data_begin_;
 
   // Image bitmap which lets us know where the objects inside of the image reside.
-  UniquePtr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
+  std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
 
   // Offset from oat_data_begin_ to the stubs.
   uint32_t interpreter_to_interpreter_bridge_offset_;
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 561d00f..9927fe1 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <memory>
+
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
@@ -31,7 +33,6 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
-#include "UniquePtrCompat.h"
 
 extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_bar(JNIEnv*, jobject, jint count) {
   return count + 1;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 02d6fa5..7664a7f 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -15,6 +15,7 @@
  */
 
 #include <algorithm>
+#include <memory>
 #include <vector>
 
 #include "base/logging.h"
@@ -33,7 +34,6 @@
 #include "utils/mips/managed_register_mips.h"
 #include "utils/x86/managed_register_x86.h"
 #include "thread.h"
-#include "UniquePtrCompat.h"
 
 #define __ jni_asm->
 
@@ -66,11 +66,11 @@
   }
   const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
   // Calling conventions used to iterate over parameters to method
-  UniquePtr<JniCallingConvention> main_jni_conv(
+  std::unique_ptr<JniCallingConvention> main_jni_conv(
       JniCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
   bool reference_return = main_jni_conv->IsReturnAReference();
 
-  UniquePtr<ManagedRuntimeCallingConvention> mr_conv(
+  std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
       ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
 
   // Calling conventions to call into JNI method "end" possibly passing a returned reference, the
@@ -86,11 +86,11 @@
     jni_end_shorty = "V";
   }
 
-  UniquePtr<JniCallingConvention> end_jni_conv(
+  std::unique_ptr<JniCallingConvention> end_jni_conv(
       JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  UniquePtr<Assembler> jni_asm(Assembler::Create(instruction_set));
+  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index df895ee..5990e8c 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -136,7 +136,7 @@
 
 CompiledMethod* CompilerLLVM::
 CompileDexMethod(DexCompilationUnit* dex_compilation_unit, InvokeType invoke_type) {
-  UniquePtr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
+  std::unique_ptr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
 
   cunit->SetDexCompilationUnit(dex_compilation_unit);
   cunit->SetCompilerDriver(compiler_driver_);
@@ -163,9 +163,9 @@
 
 CompiledMethod* CompilerLLVM::
 CompileNativeMethod(DexCompilationUnit* dex_compilation_unit) {
-  UniquePtr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
+  std::unique_ptr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
 
-  UniquePtr<JniCompiler> jni_compiler(
+  std::unique_ptr<JniCompiler> jni_compiler(
       new JniCompiler(cunit.get(), compiler_driver_, dex_compilation_unit));
 
   return jni_compiler->Compile();
diff --git a/compiler/llvm/compiler_llvm.h b/compiler/llvm/compiler_llvm.h
index c2211fb..cc74deb 100644
--- a/compiler/llvm/compiler_llvm.h
+++ b/compiler/llvm/compiler_llvm.h
@@ -17,18 +17,17 @@
 #ifndef ART_COMPILER_LLVM_COMPILER_LLVM_H_
 #define ART_COMPILER_LLVM_COMPILER_LLVM_H_
 
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
 #include "base/macros.h"
 #include "dex_file.h"
 #include "driver/compiler_driver.h"
 #include "instruction_set.h"
 #include "mirror/object.h"
 
-#include <UniquePtr.h>
-
-#include <string>
-#include <utility>
-#include <vector>
-
 namespace art {
   class CompiledMethod;
   class CompilerDriver;
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index cf28db3..25c9b20 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -141,7 +141,7 @@
 
   std::vector<llvm::BasicBlock*> basic_block_landing_pads_;
   llvm::BasicBlock* current_bb_;
-  std::map<llvm::BasicBlock*, std::vector<std::pair<llvm::BasicBlock*, llvm::BasicBlock*> > >
+  std::map<llvm::BasicBlock*, std::vector<std::pair<llvm::BasicBlock*, llvm::BasicBlock*>>>
       landing_pad_phi_mapping_;
   llvm::BasicBlock* basic_block_unwind_;
 
@@ -545,7 +545,7 @@
     }
 
     llvm::TerminatorInst* term_inst = lbb->getTerminator();
-    std::vector<std::pair<llvm::BasicBlock*, llvm::BasicBlock*> >& rewrite_pair
+    std::vector<std::pair<llvm::BasicBlock*, llvm::BasicBlock*>>& rewrite_pair
         = landing_pad_phi_mapping_[lbb];
     irb_.SetInsertPoint(lbb->begin());
 
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index 78bdb4d..741c2d7 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -152,7 +152,7 @@
   std::string bitcode;
   DumpBitcodeToString(bitcode);
   std::string filename(StringPrintf("%s/Art%zu.bc", DumpDirectory().c_str(), cunit_id_));
-  UniquePtr<File> output(OS::CreateEmptyFile(filename.c_str()));
+  std::unique_ptr<File> output(OS::CreateEmptyFile(filename.c_str()));
   output->WriteFully(bitcode.data(), bitcode.size());
   LOG(INFO) << ".bc file written successfully: " << filename;
 }
@@ -179,7 +179,7 @@
   if (kDumpELF) {
     // Dump the ELF image for debugging
     std::string filename(StringPrintf("%s/Art%zu.o", DumpDirectory().c_str(), cunit_id_));
-    UniquePtr<File> output(OS::CreateEmptyFile(filename.c_str()));
+    std::unique_ptr<File> output(OS::CreateEmptyFile(filename.c_str()));
     output->WriteFully(elf_object_.data(), elf_object_.size());
     LOG(INFO) << ".o file written successfully: " << filename;
   }
diff --git a/compiler/llvm/llvm_compilation_unit.h b/compiler/llvm/llvm_compilation_unit.h
index 58aa6fd..f11fb6e 100644
--- a/compiler/llvm/llvm_compilation_unit.h
+++ b/compiler/llvm/llvm_compilation_unit.h
@@ -17,6 +17,10 @@
 #ifndef ART_COMPILER_LLVM_LLVM_COMPILATION_UNIT_H_
 #define ART_COMPILER_LLVM_LLVM_COMPILATION_UNIT_H_
 
+#include <memory>
+#include <string>
+#include <vector>
+
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "dex/compiler_internals.h"
@@ -28,10 +32,6 @@
 #include "runtime_support_llvm_func.h"
 #include "safe_map.h"
 
-#include <UniquePtr.h>
-#include <string>
-#include <vector>
-
 namespace art {
   class CompiledMethod;
 }
@@ -106,12 +106,12 @@
   const CompilerLLVM* compiler_llvm_;
   const size_t cunit_id_;
 
-  UniquePtr< ::llvm::LLVMContext> context_;
-  UniquePtr<IRBuilder> irb_;
-  UniquePtr<RuntimeSupportBuilder> runtime_support_;
+  std::unique_ptr< ::llvm::LLVMContext> context_;
+  std::unique_ptr<IRBuilder> irb_;
+  std::unique_ptr<RuntimeSupportBuilder> runtime_support_;
   ::llvm::Module* module_;  // Managed by context_
-  UniquePtr<IntrinsicHelper> intrinsic_helper_;
-  UniquePtr<LLVMInfo> llvm_info_;
+  std::unique_ptr<IntrinsicHelper> intrinsic_helper_;
+  std::unique_ptr<LLVMInfo> llvm_info_;
   CompilerDriver* driver_;
   DexCompilationUnit* dex_compilation_unit_;
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index ce35d0f..a7ee82e 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -128,7 +128,7 @@
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
   }
   std::string error_msg;
-  UniquePtr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), NULL, false,
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), NULL, false,
                                             &error_msg));
   ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
   const OatHeader& oat_header = oat_file->GetOatHeader();
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 7a41d87..8c20aa8 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -18,15 +18,14 @@
 #define ART_COMPILER_OAT_WRITER_H_
 
 #include <stdint.h>
-
 #include <cstddef>
+#include <memory>
 
 #include "driver/compiler_driver.h"
 #include "mem_map.h"
 #include "oat.h"
 #include "mirror/class.h"
 #include "safe_map.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -256,16 +255,16 @@
   OatHeader* oat_header_;
   std::vector<OatDexFile*> oat_dex_files_;
   std::vector<OatClass*> oat_classes_;
-  UniquePtr<const std::vector<uint8_t> > interpreter_to_interpreter_bridge_;
-  UniquePtr<const std::vector<uint8_t> > interpreter_to_compiled_code_bridge_;
-  UniquePtr<const std::vector<uint8_t> > jni_dlsym_lookup_;
-  UniquePtr<const std::vector<uint8_t> > portable_imt_conflict_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > portable_resolution_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > portable_to_interpreter_bridge_;
-  UniquePtr<const std::vector<uint8_t> > quick_generic_jni_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > quick_imt_conflict_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > quick_resolution_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > quick_to_interpreter_bridge_;
+  std::unique_ptr<const std::vector<uint8_t>> interpreter_to_interpreter_bridge_;
+  std::unique_ptr<const std::vector<uint8_t>> interpreter_to_compiled_code_bridge_;
+  std::unique_ptr<const std::vector<uint8_t>> jni_dlsym_lookup_;
+  std::unique_ptr<const std::vector<uint8_t>> portable_imt_conflict_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> portable_resolution_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> portable_to_interpreter_bridge_;
+  std::unique_ptr<const std::vector<uint8_t>> quick_generic_jni_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> quick_imt_conflict_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> quick_resolution_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> quick_to_interpreter_bridge_;
 
   // output stats
   uint32_t size_dex_file_alignment_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2c2564d..521992a 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -499,6 +499,7 @@
       break;
     }
 
+    case Instruction::MOVE_RESULT:
     case Instruction::MOVE_RESULT_WIDE: {
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index d40990e..7684bb1 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -42,7 +42,7 @@
 
  private:
   size_t size_;
-  UniquePtr<uint8_t[]> memory_;
+  std::unique_ptr<uint8_t[]> memory_;
 
   DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
 };
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index b9c1164..52e3e37 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -18,6 +18,7 @@
 
 #include "driver/dex_compilation_unit.h"
 #include "nodes.h"
+#include "ssa_liveness_analysis.h"
 
 namespace art {
 
@@ -102,6 +103,24 @@
       }
       output_ << "]";
     }
+    if (instruction->GetLifetimePosition() != kNoLifetime) {
+      output_ << " (liveness: " << instruction->GetLifetimePosition();
+      if (instruction->HasLiveInterval()) {
+        output_ << " ";
+        const GrowableArray<LiveRange>& ranges = instruction->GetLiveInterval()->GetRanges();
+        size_t i = ranges.Size() - 1;
+        do {
+          output_ << "[" << ranges.Get(i).GetStart() << "," << ranges.Get(i).GetEnd() << "[";
+          if (i == 0) {
+            break;
+          } else {
+            --i;
+            output_ << ",";
+          }
+        } while (true);
+      }
+      output_ << ")";
+    }
   }
 
   void PrintInstructions(const HInstructionList& list) {
@@ -126,8 +145,14 @@
   void VisitBasicBlock(HBasicBlock* block) {
     StartTag("block");
     PrintProperty("name", "B", block->GetBlockId());
-    PrintInt("from_bci", -1);
-    PrintInt("to_bci", -1);
+    if (block->GetLifetimeStart() != kNoLifetime) {
+      // Piggy back on these fields to show the lifetime of the block.
+      PrintInt("from_bci", block->GetLifetimeStart());
+      PrintInt("to_bci", block->GetLifetimeEnd());
+    } else {
+      PrintInt("from_bci", -1);
+      PrintInt("to_bci", -1);
+    }
     PrintPredecessors(block);
     PrintSuccessors(block);
     PrintEmptyProperty("xhandlers");
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
new file mode 100644
index 0000000..9849388
--- /dev/null
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "builder.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "ssa_liveness_analysis.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
+  HGraphBuilder builder(allocator);
+  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+  HGraph* graph = builder.BuildGraph(*item);
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+  graph->FindNaturalLoops();
+  return graph;
+}
+
+TEST(LiveRangesTest, CFG1) {
+  /*
+   * Test the following snippet:
+   *  return 0;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       3: goto
+   *           |
+   *       6: return
+   *           |
+   *       9: exit
+   */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::RETURN);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  LiveRange range = interval->GetRanges().Get(0);
+  ASSERT_EQ(2u, range.GetStart());
+  // Last use is the return instruction.
+  ASSERT_EQ(6u, range.GetEnd());
+  HBasicBlock* block = graph->GetBlocks().Get(1);
+  ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr);
+  ASSERT_EQ(6u, block->GetLastInstruction()->GetLifetimePosition());
+}
+
+TEST(LiveRangesTest, CFG2) {
+  /*
+   * Test the following snippet:
+   *  var a = 0;
+   *  if (0 == 0) {
+   *  } else {
+   *  }
+   *  return a;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       3: goto
+   *           |
+   *       6: equal
+   *       7: if
+   *       /       \
+   *   10: goto   13: goto
+   *       \       /
+   *       16: return
+   *         |
+   *       19: exit
+   */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  LiveRange range = interval->GetRanges().Get(0);
+  ASSERT_EQ(2u, range.GetStart());
+  // Last use is the return instruction.
+  ASSERT_EQ(16u, range.GetEnd());
+  HBasicBlock* block = graph->GetBlocks().Get(3);
+  ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr);
+  ASSERT_EQ(16u, block->GetLastInstruction()->GetLifetimePosition());
+}
+
+TEST(LiveRangesTest, CFG3) {
+  /*
+   * Test the following snippet:
+   *  var a = 0;
+   *  if (0 == 0) {
+   *  } else {
+   *    a = 4;
+   *  }
+   *  return a;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       3: constant4
+   *       4: goto
+   *           |
+   *       7: equal
+   *       8: if
+   *       /       \
+   *   11: goto   14: goto
+   *       \       /
+   *       16: phi
+   *       17: return
+   *         |
+   *       20: exit
+   */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  // Test for the 0 constant.
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  LiveRange range = interval->GetRanges().Get(0);
+  ASSERT_EQ(2u, range.GetStart());
+  // Last use is the phi at the return block so instruction is live until
+  // the end of the then block.
+  ASSERT_EQ(12u, range.GetEnd());
+
+  // Test for the 4 constant.
+  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  // The then branch is a hole for this constant, therefore its interval has 2 ranges.
+  ASSERT_EQ(2u, interval->GetRanges().Size());
+  // First range is the else block.
+  range = interval->GetRanges().Get(0);
+  ASSERT_EQ(13u, range.GetStart());
+  // Last use is the phi at the return block.
+  ASSERT_EQ(15u, range.GetEnd());
+  // Second range starts from the definition and ends at the if block.
+  range = interval->GetRanges().Get(1);
+  ASSERT_EQ(3u, range.GetStart());
+  // 9 is the end of the if block.
+  ASSERT_EQ(9u, range.GetEnd());
+
+  // Test for the phi.
+  interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  range = interval->GetRanges().Get(0);
+  ASSERT_EQ(16u, range.GetStart());
+  ASSERT_EQ(17u, range.GetEnd());
+}
+
+TEST(LiveRangesTest, Loop) {
+  /*
+   * Test the following snippet:
+   *  var a = 0;
+   *  while (a == a) {
+   *    a = 4;
+   *  }
+   *  return 5;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       3: constant4
+   *       4: constant5
+   *       5: goto
+   *           |
+   *       8: goto
+   *           |
+   *       10: phi
+   *       11: equal
+   *       12: if +++++
+   *        |       \ +
+   *        |     15: goto
+   *        |
+   *       18: return
+   *         |
+   *       21: exit
+   */
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0xFD00,
+    Instruction::CONST_4 | 5 << 12 | 1 << 8,
+    Instruction::RETURN | 1 << 8);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  // Test for the 0 constant.
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  LiveRange range = interval->GetRanges().Get(0);
+  ASSERT_EQ(2u, range.GetStart());
+  // Last use is the loop phi so instruction is live until
+  // the end of the pre loop header.
+  ASSERT_EQ(9u, range.GetEnd());
+
+  // Test for the 4 constant.
+  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  // The instruction is live until the end of the loop.
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  range = interval->GetRanges().Get(0);
+  ASSERT_EQ(3u, range.GetStart());
+  ASSERT_EQ(16u, range.GetEnd());
+
+  // Test for the 5 constant.
+  interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval();
+  // The instruction is live until the return instruction of the loop.
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  range = interval->GetRanges().Get(0);
+  ASSERT_EQ(4u, range.GetStart());
+  ASSERT_EQ(18u, range.GetEnd());
+
+  // Test for the phi.
+  interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  range = interval->GetRanges().Get(0);
+  // Instruction is consumed by the if.
+  ASSERT_EQ(10u, range.GetStart());
+  ASSERT_EQ(11u, range.GetEnd());
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 1085c10..a2cb1c4 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -29,6 +29,7 @@
 class HIntConstant;
 class HGraphVisitor;
 class HPhi;
+class LiveInterval;
 class LocationSummary;
 
 static const int kDefaultNumberOfBlocks = 8;
@@ -223,6 +224,8 @@
   DISALLOW_COPY_AND_ASSIGN(HLoopInformation);
 };
 
+static constexpr size_t kNoLifetime = -1;
+
 // A block in a method. Contains the list of instructions represented
 // as a double linked list. Each block knows its predecessors and
 // successors.
@@ -234,7 +237,9 @@
         successors_(graph->GetArena(), kDefaultNumberOfSuccessors),
         loop_information_(nullptr),
         dominator_(nullptr),
-        block_id_(-1) { }
+        block_id_(-1),
+        lifetime_start_(kNoLifetime),
+        lifetime_end_(kNoLifetime) {}
 
   const GrowableArray<HBasicBlock*>& GetPredecessors() const {
     return predecessors_;
@@ -299,6 +304,15 @@
     block->successors_.Add(this);
   }
 
+  size_t GetPredecessorIndexOf(HBasicBlock* predecessor) {
+    for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
+      if (predecessors_.Get(i) == predecessor) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
   void AddInstruction(HInstruction* instruction);
   void RemoveInstruction(HInstruction* instruction);
   void AddPhi(HPhi* phi);
@@ -334,6 +348,12 @@
   // Returns wheter this block dominates the blocked passed as parameter.
   bool Dominates(HBasicBlock* block) const;
 
+  size_t GetLifetimeStart() const { return lifetime_start_; }
+  size_t GetLifetimeEnd() const { return lifetime_end_; }
+
+  void SetLifetimeStart(size_t start) { lifetime_start_ = start; }
+  void SetLifetimeEnd(size_t end) { lifetime_end_ = end; }
+
  private:
   HGraph* const graph_;
   GrowableArray<HBasicBlock*> predecessors_;
@@ -343,6 +363,8 @@
   HLoopInformation* loop_information_;
   HBasicBlock* dominator_;
   int block_id_;
+  size_t lifetime_start_;
+  size_t lifetime_end_;
 
   DISALLOW_COPY_AND_ASSIGN(HBasicBlock);
 };
@@ -407,7 +429,9 @@
         uses_(nullptr),
         env_uses_(nullptr),
         environment_(nullptr),
-        locations_(nullptr) { }
+        locations_(nullptr),
+        live_interval_(nullptr),
+        lifetime_position_(kNoLifetime) {}
 
   virtual ~HInstruction() { }
 
@@ -477,6 +501,12 @@
   FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
+  size_t GetLifetimePosition() const { return lifetime_position_; }
+  void SetLifetimePosition(size_t position) { lifetime_position_ = position; }
+  LiveInterval* GetLiveInterval() const { return live_interval_; }
+  void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; }
+  bool HasLiveInterval() const { return live_interval_ != nullptr; }
+
  private:
   HInstruction* previous_;
   HInstruction* next_;
@@ -501,6 +531,13 @@
   // Set by the code generator.
   LocationSummary* locations_;
 
+  // Set by the liveness analysis.
+  LiveInterval* live_interval_;
+
+  // Set by the liveness analysis, this is the position in a linear
+  // order of blocks where this instruction's live interval start.
+  size_t lifetime_position_;
+
   friend class HBasicBlock;
   friend class HInstructionList;
 
@@ -596,6 +633,8 @@
  private:
   HInstruction* instruction_;
   HInstruction* next_;
+
+  DISALLOW_COPY_AND_ASSIGN(HInstructionIterator);
 };
 
 class HBackwardInstructionIterator : public ValueObject {
@@ -615,6 +654,8 @@
  private:
   HInstruction* instruction_;
   HInstruction* next_;
+
+  DISALLOW_COPY_AND_ASSIGN(HBackwardInstructionIterator);
 };
 
 // An embedded container with N elements of type T.  Used (with partial
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f435cb0..286f48a 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -129,6 +129,7 @@
 
   graph->FindNaturalLoops();
   SsaLivenessAnalysis(*graph).Analyze();
+  visualizer.DumpGraph("liveness");
 
   return new CompiledMethod(GetCompilerDriver(),
                             instruction_set,
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 85171aa..0f16ad2 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -22,7 +22,7 @@
 void SsaLivenessAnalysis::Analyze() {
   LinearizeGraph();
   NumberInstructions();
-  ComputeSets();
+  ComputeLiveness();
 }
 
 static bool IsLoopExit(HLoopInformation* current, HLoopInformation* to) {
@@ -96,6 +96,22 @@
   DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
 };
 
+class HLinearPostOrderIterator : public ValueObject {
+ public:
+  explicit HLinearPostOrderIterator(const GrowableArray<HBasicBlock*>& post_order)
+      : post_order_(post_order), index_(0) {}
+
+  bool Done() const { return index_ == post_order_.Size(); }
+  HBasicBlock* Current() const { return post_order_.Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const GrowableArray<HBasicBlock*>& post_order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
+};
+
 void SsaLivenessAnalysis::LinearizeGraph() {
   // For simplicity of the implementation, we create post linear order. The order for
   // computing live ranges is the reverse of that order.
@@ -105,27 +121,41 @@
 
 void SsaLivenessAnalysis::NumberInstructions() {
   int ssa_index = 0;
+  size_t lifetime_position = 0;
+  // Each instruction gets an individual lifetime position, and a block gets a lifetime
+  // start and end position. Non-phi instructions have a distinct lifetime position than
+  // the block they are in. Phi instructions have the lifetime start of their block as
+  // lifetime position
   for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
+    block->SetLifetimeStart(++lifetime_position);
 
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       if (current->HasUses()) {
+        instructions_from_ssa_index_.Add(current);
         current->SetSsaIndex(ssa_index++);
+        current->SetLiveInterval(new (graph_.GetArena()) LiveInterval(graph_.GetArena()));
       }
+      current->SetLifetimePosition(lifetime_position);
     }
 
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       if (current->HasUses()) {
+        instructions_from_ssa_index_.Add(current);
         current->SetSsaIndex(ssa_index++);
+        current->SetLiveInterval(new (graph_.GetArena()) LiveInterval(graph_.GetArena()));
       }
+      current->SetLifetimePosition(++lifetime_position);
     }
+
+    block->SetLifetimeEnd(++lifetime_position);
   }
   number_of_ssa_values_ = ssa_index;
 }
 
-void SsaLivenessAnalysis::ComputeSets() {
+void SsaLivenessAnalysis::ComputeLiveness() {
   for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     block_infos_.Put(
@@ -133,9 +163,10 @@
         new (graph_.GetArena()) BlockInfo(graph_.GetArena(), *block, number_of_ssa_values_));
   }
 
-  // Compute the initial live_in, live_out, and kill sets. This method does not handle
-  // backward branches, therefore live_in and live_out sets are not yet correct.
-  ComputeInitialSets();
+  // Compute the live ranges, as well as the initial live_in, live_out, and kill sets.
+  // This method does not handle backward branches for the sets, therefore live_in
+  // and live_out sets are not yet correct.
+  ComputeLiveRanges();
 
   // Do a fixed point calculation to take into account backward branches,
   // that will update live_in of loop headers, and therefore live_out and live_in
@@ -143,26 +174,71 @@
   ComputeLiveInAndLiveOutSets();
 }
 
-void SsaLivenessAnalysis::ComputeInitialSets() {
-  // Do a post orderr visit, adding inputs of instructions live in the block where
+class InstructionBitVectorIterator : public ValueObject {
+ public:
+  InstructionBitVectorIterator(const BitVector& vector,
+                               const GrowableArray<HInstruction*>& instructions)
+        : instructions_(instructions),
+          iterator_(BitVector::Iterator(&vector)),
+          current_bit_index_(iterator_.Next()) {}
+
+  bool Done() const { return current_bit_index_ == -1; }
+  HInstruction* Current() const { return instructions_.Get(current_bit_index_); }
+  void Advance() {
+    current_bit_index_ = iterator_.Next();
+  }
+
+ private:
+  const GrowableArray<HInstruction*> instructions_;
+  BitVector::Iterator iterator_;
+  int32_t current_bit_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionBitVectorIterator);
+};
+
+void SsaLivenessAnalysis::ComputeLiveRanges() {
+  // Do a post order visit, adding inputs of instructions live in the block where
   // that instruction is defined, and killing instructions that are being visited.
-  for (HPostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+  for (HLinearPostOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
 
     BitVector* kill = GetKillSet(*block);
     BitVector* live_in = GetLiveInSet(*block);
 
+    // Set phi inputs of successors of this block corresponding to this block
+    // as live_in.
+    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
+      HBasicBlock* successor = block->GetSuccessors().Get(i);
+      live_in->Union(GetLiveInSet(*successor));
+      size_t phi_input_index = successor->GetPredecessorIndexOf(block);
+      for (HInstructionIterator it(successor->GetPhis()); !it.Done(); it.Advance()) {
+        HInstruction* input = it.Current()->InputAt(phi_input_index);
+        live_in->SetBit(input->GetSsaIndex());
+      }
+    }
+
+    // Add a range that covers this block to all instructions live_in because of successors.
+    for (InstructionBitVectorIterator it(*live_in, instructions_from_ssa_index_);
+         !it.Done();
+         it.Advance()) {
+      it.Current()->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd());
+    }
+
     for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       if (current->HasSsaIndex()) {
+        // Kill the instruction and shorten its interval.
         kill->SetBit(current->GetSsaIndex());
         live_in->ClearBit(current->GetSsaIndex());
+        current->GetLiveInterval()->SetFrom(current->GetLifetimePosition());
       }
 
       // All inputs of an instruction must be live.
       for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-        DCHECK(current->InputAt(i)->HasSsaIndex());
-        live_in->SetBit(current->InputAt(i)->GetSsaIndex());
+        HInstruction* input = current->InputAt(i);
+        DCHECK(input->HasSsaIndex());
+        live_in->SetBit(input->GetSsaIndex());
+        input->GetLiveInterval()->AddUse(current);
       }
 
       if (current->HasEnvironment()) {
@@ -173,32 +249,30 @@
           if (instruction != nullptr) {
             DCHECK(instruction->HasSsaIndex());
             live_in->SetBit(instruction->GetSsaIndex());
+            instruction->GetLiveInterval()->AddUse(current);
           }
         }
       }
     }
 
+    // Kill phis defined in this block.
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       if (current->HasSsaIndex()) {
         kill->SetBit(current->GetSsaIndex());
         live_in->ClearBit(current->GetSsaIndex());
       }
+    }
 
-      // Mark a phi input live_in for its corresponding predecessor.
-      for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-        HInstruction* input = current->InputAt(i);
-
-        HBasicBlock* predecessor = block->GetPredecessors().Get(i);
-        size_t ssa_index = input->GetSsaIndex();
-        BitVector* predecessor_kill = GetKillSet(*predecessor);
-        BitVector* predecessor_live_in = GetLiveInSet(*predecessor);
-
-        // Phi inputs from a back edge have already been visited. If the back edge
-        // block defines that input, we should not add it to its live_in.
-        if (!predecessor_kill->IsBitSet(ssa_index)) {
-          predecessor_live_in->SetBit(ssa_index);
-        }
+    if (block->IsLoopHeader()) {
+      HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0);
+      // For all live_in instructions at the loop header, we need to create a range
+      // that covers the full loop.
+      for (InstructionBitVectorIterator it(*live_in, instructions_from_ssa_index_);
+           !it.Done();
+           it.Advance()) {
+        it.Current()->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
+                                                      back_edge->GetLifetimeEnd());
       }
     }
   }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index b8695ba..2d91436 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -44,12 +44,104 @@
   DISALLOW_COPY_AND_ASSIGN(BlockInfo);
 };
 
+/**
+ * A live range contains the start and end of a range where an instruction
+ * is live.
+ */
+class LiveRange : public ValueObject {
+ public:
+  LiveRange(size_t start, size_t end) : start_(start), end_(end) {
+    DCHECK_LT(start, end);
+  }
+
+  size_t GetStart() const { return start_; }
+  size_t GetEnd() const { return end_; }
+
+ private:
+  size_t start_;
+  size_t end_;
+};
+
+static constexpr int kDefaultNumberOfRanges = 3;
+
+/**
+ * An interval is a list of disjoint live ranges where an instruction is live.
+ * Each instruction that has uses gets an interval.
+ */
+class LiveInterval : public ArenaObject {
+ public:
+  explicit LiveInterval(ArenaAllocator* allocator) : ranges_(allocator, kDefaultNumberOfRanges) {}
+
+  void AddUse(HInstruction* instruction) {
+    size_t position = instruction->GetLifetimePosition();
+    size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
+    size_t end_block_position = instruction->GetBlock()->GetLifetimeEnd();
+    if (ranges_.IsEmpty()) {
+      // First time we see a use of that interval.
+      ranges_.Add(LiveRange(start_block_position, position));
+    } else if (ranges_.Peek().GetStart() == start_block_position) {
+      // There is a use later in the same block.
+      DCHECK_LE(position, ranges_.Peek().GetEnd());
+    } else if (ranges_.Peek().GetStart() == end_block_position + 1) {
+      // Last use is in a following block.
+      LiveRange existing = ranges_.Pop();
+      ranges_.Add(LiveRange(start_block_position, existing.GetEnd()));
+    } else {
+      // There is a hole in the interval. Create a new range.
+      ranges_.Add(LiveRange(start_block_position, position));
+    }
+  }
+
+  void AddRange(size_t start, size_t end) {
+    if (ranges_.IsEmpty()) {
+      ranges_.Add(LiveRange(start, end));
+    } else if (ranges_.Peek().GetStart() == end + 1) {
+      // There is a use in the following block.
+      LiveRange existing = ranges_.Pop();
+      ranges_.Add(LiveRange(start, existing.GetEnd()));
+    } else {
+      // There is a hole in the interval. Create a new range.
+      ranges_.Add(LiveRange(start, end));
+    }
+  }
+
+  void AddLoopRange(size_t start, size_t end) {
+    DCHECK(!ranges_.IsEmpty());
+    while (!ranges_.IsEmpty() && ranges_.Peek().GetEnd() < end) {
+      DCHECK_LE(start, ranges_.Peek().GetStart());
+      ranges_.Pop();
+    }
+    if (ranges_.IsEmpty()) {
+      // Uses are only in the loop.
+      ranges_.Add(LiveRange(start, end));
+    } else {
+      // There are uses after the loop.
+      LiveRange range = ranges_.Pop();
+      ranges_.Add(LiveRange(start, range.GetEnd()));
+    }
+  }
+
+  void SetFrom(size_t from) {
+    DCHECK(!ranges_.IsEmpty());
+    LiveRange existing = ranges_.Pop();
+    ranges_.Add(LiveRange(from, existing.GetEnd()));
+  }
+
+  const GrowableArray<LiveRange>& GetRanges() const { return ranges_; }
+
+ private:
+  GrowableArray<LiveRange> ranges_;
+
+  DISALLOW_COPY_AND_ASSIGN(LiveInterval);
+};
+
 class SsaLivenessAnalysis : public ValueObject {
  public:
   explicit SsaLivenessAnalysis(const HGraph& graph)
       : graph_(graph),
         linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()),
         block_infos_(graph.GetArena(), graph.GetBlocks().Size()),
+        instructions_from_ssa_index_(graph.GetArena(), 0),
         number_of_ssa_values_(0) {
     block_infos_.SetSize(graph.GetBlocks().Size());
   }
@@ -72,6 +164,10 @@
     return linear_post_order_;
   }
 
+  HInstruction* GetInstructionFromSsaIndex(size_t index) {
+    return instructions_from_ssa_index_.Get(index);
+  }
+
  private:
   // Linearize the graph so that:
   // (1): a block is always after its dominator,
@@ -79,15 +175,16 @@
   // This creates a natural and efficient ordering when visualizing live ranges.
   void LinearizeGraph();
 
-  // Give an SSA number to each instruction that defines a value used by another instruction.
+  // Give an SSA number to each instruction that defines a value used by another instruction,
+  // and setup the lifetime information of each instruction and block.
   void NumberInstructions();
 
-  // Compute live_in, live_out and kill sets.
-  void ComputeSets();
+  // Compute live ranges of instructions, as well as live_in, live_out and kill sets.
+  void ComputeLiveness();
 
-  // Compute the initial live_in, live_out and kill sets, without analyzing
-  // backward branches.
-  void ComputeInitialSets();
+  // Compute the live ranges of instructions, as well as the initial live_in, live_out and
+  // kill sets, that do not take into account backward branches.
+  void ComputeLiveRanges();
 
   // After computing the initial sets, this method does a fixed point
   // calculation over the live_in and live_out set to take into account
@@ -103,6 +200,7 @@
   const HGraph& graph_;
   GrowableArray<HBasicBlock*> linear_post_order_;
   GrowableArray<BlockInfo*> block_infos_;
+  GrowableArray<HInstruction*> instructions_from_ssa_index_;
   size_t number_of_ssa_values_;
 
   DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis);
diff --git a/compiler/output_stream_test.cc b/compiler/output_stream_test.cc
index 290bf25..5fa0ccb 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/output_stream_test.cc
@@ -64,7 +64,7 @@
   FileOutputStream output_stream(tmp.GetFile());
   SetOutputStream(output_stream);
   GenerateTestOutput();
-  UniquePtr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
+  std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
   EXPECT_TRUE(in.get() != NULL);
   std::vector<uint8_t> actual(in->GetLength());
   bool readSuccess = in->ReadFully(&actual[0], actual.size());
@@ -74,12 +74,12 @@
 
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
-  UniquePtr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
+  std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
   CHECK(file_output_stream.get() != NULL);
   BufferedOutputStream buffered_output_stream(file_output_stream.release());
   SetOutputStream(buffered_output_stream);
   GenerateTestOutput();
-  UniquePtr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
+  std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
   EXPECT_TRUE(in.get() != NULL);
   std::vector<uint8_t> actual(in->GetLength());
   bool readSuccess = in->ReadFully(&actual[0], actual.size());
diff --git a/compiler/sea_ir/debug/dot_gen.h b/compiler/sea_ir/debug/dot_gen.h
index d7d21ad..a5d6819 100644
--- a/compiler/sea_ir/debug/dot_gen.h
+++ b/compiler/sea_ir/debug/dot_gen.h
@@ -104,7 +104,7 @@
     LOG(INFO) << "Starting to write SEA string to file " << filename << std::endl;
     DotGenerationVisitor dgv = DotGenerationVisitor(&options_, types);
     graph->Accept(&dgv);
-    // TODO: UniquePtr to close file properly. Switch to BufferedOutputStream.
+    // TODO: std::unique_ptr to close file properly. Switch to BufferedOutputStream.
     art::File* file = art::OS::CreateEmptyFile(filename.c_str());
     art::FileOutputStream fos(file);
     std::string graph_as_string = dgv.GetResult();
diff --git a/compiler/sea_ir/ir/sea.cc b/compiler/sea_ir/ir/sea.cc
index 0734b21..2b25f56 100644
--- a/compiler/sea_ir/ir/sea.cc
+++ b/compiler/sea_ir/ir/sea.cc
@@ -289,7 +289,7 @@
 void SeaGraph::ConvertToSSA() {
   // Pass: find global names.
   // The map @block maps registers to the blocks in which they are defined.
-  std::map<int, std::set<Region*> > blocks;
+  std::map<int, std::set<Region*>> blocks;
   // The set @globals records registers whose use
   // is in a different block than the corresponding definition.
   std::set<int> globals;
@@ -311,7 +311,7 @@
         var_kill.insert(reg_def);
       }
 
-      blocks.insert(std::pair<int, std::set<Region*> >(reg_def, std::set<Region*>()));
+      blocks.insert(std::pair<int, std::set<Region*>>(reg_def, std::set<Region*>()));
       std::set<Region*>* reg_def_blocks = &(blocks.find(reg_def)->second);
       reg_def_blocks->insert(*region_it);
     }
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index d03b99f..24378b4 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -30,7 +30,7 @@
 namespace arm {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  UniquePtr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm)));
+  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
@@ -47,7 +47,7 @@
   __ bkpt(0);
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
@@ -58,7 +58,7 @@
 namespace arm64 {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<8> offset) {
-  UniquePtr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64)));
+  std::unique_ptr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
@@ -84,7 +84,7 @@
   }
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
@@ -95,7 +95,7 @@
 namespace mips {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  UniquePtr<MipsAssembler> assembler(static_cast<MipsAssembler*>(Assembler::Create(kMips)));
+  std::unique_ptr<MipsAssembler> assembler(static_cast<MipsAssembler*>(Assembler::Create(kMips)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
@@ -114,7 +114,7 @@
   __ Break();
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
@@ -124,14 +124,14 @@
 
 namespace x86 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) {
-  UniquePtr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86)));
+  std::unique_ptr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86)));
 
   // All x86 trampolines call via the Thread* held in fs.
   __ fs()->jmp(Address::Absolute(offset));
   __ int3();
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
@@ -141,7 +141,7 @@
 
 namespace x86_64 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) {
-  UniquePtr<x86_64::X86_64Assembler>
+  std::unique_ptr<x86_64::X86_64Assembler>
       assembler(static_cast<x86_64::X86_64Assembler*>(Assembler::Create(kX86_64)));
 
   // All x86 trampolines call via the Thread* held in gs.
@@ -149,7 +149,7 @@
   __ int3();
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 0f4a9a4..ab4999a 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -17,8 +17,9 @@
 #ifndef ART_COMPILER_UTILS_ARM64_ASSEMBLER_ARM64_H_
 #define ART_COMPILER_UTILS_ARM64_ASSEMBLER_ARM64_H_
 
-#include <vector>
 #include <stdint.h>
+#include <memory>
+#include <vector>
 
 #include "base/logging.h"
 #include "constants_arm64.h"
@@ -26,7 +27,6 @@
 #include "utils/assembler.h"
 #include "offsets.h"
 #include "utils.h"
-#include "UniquePtrCompat.h"
 #include "a64/macro-assembler-a64.h"
 #include "a64/disasm-a64.h"
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 1b050cf..754496b 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -347,7 +347,7 @@
     }
 
     size_t cs = assembler_->CodeSize();
-    UniquePtr<std::vector<uint8_t> > data(new std::vector<uint8_t>(cs));
+    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
     assembler_->FinalizeInstructions(code);
 
@@ -375,7 +375,7 @@
     bool ok;
     std::string error_msg;
     std::string base_name;
-    UniquePtr<std::vector<uint8_t>> code;
+    std::unique_ptr<std::vector<uint8_t>> code;
     uintptr_t length;
   };
 
@@ -681,7 +681,7 @@
     return tmpnam_;
   }
 
-  UniquePtr<Ass> assembler_;
+  std::unique_ptr<Ass> assembler_;
 
   std::string resolved_assembler_cmd_;
   std::string resolved_objdump_cmd_;
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 7cc253c..4c52174 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -77,7 +77,7 @@
 
  private:
   std::string lock_name_[kShard];
-  UniquePtr<Mutex> lock_[kShard];
+  std::unique_ptr<Mutex> lock_[kShard];
   std::set<HashedKey, Comparator> keys_[kShard];
 
   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
diff --git a/compiler/utils/scoped_arena_containers.h b/compiler/utils/scoped_arena_containers.h
index c6fefde..5deb661 100644
--- a/compiler/utils/scoped_arena_containers.h
+++ b/compiler/utils/scoped_arena_containers.h
@@ -26,14 +26,14 @@
 namespace art {
 
 template <typename T>
-using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T> >;
+using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>;
 
-template <typename T, typename Comparator = std::less<T> >
-using ScopedArenaSet = std::set<T, Comparator, ScopedArenaAllocatorAdapter<T> >;
+template <typename T, typename Comparator = std::less<T>>
+using ScopedArenaSet = std::set<T, Comparator, ScopedArenaAllocatorAdapter<T>>;
 
-template <typename K, typename V, typename Comparator = std::less<K> >
+template <typename K, typename V, typename Comparator = std::less<K>>
 using ScopedArenaSafeMap =
-    SafeMap<K, V, Comparator, ScopedArenaAllocatorAdapter<std::pair<const K, V> > >;
+    SafeMap<K, V, Comparator, ScopedArenaAllocatorAdapter<std::pair<const K, V>>>;
 
 }  // namespace art
 
diff --git a/compiler/utils/scoped_hashtable.h b/compiler/utils/scoped_hashtable.h
index ccec7ba..bf8dd1f 100644
--- a/compiler/utils/scoped_hashtable.h
+++ b/compiler/utils/scoped_hashtable.h
@@ -36,7 +36,7 @@
   // Lookups entry K starting from the current (topmost) scope
   // and returns its value if found or NULL.
   V Lookup(K k) const {
-    for (typename std::list<std::map<K, V> >::const_iterator scopes_it = scopes.begin();
+    for (typename std::list<std::map<K, V>>::const_iterator scopes_it = scopes.begin();
         scopes_it != scopes.end(); scopes_it++) {
       typename std::map<K, V>::const_iterator result_it = (*scopes_it).find(k);
       if (result_it != (*scopes_it).end()) {
@@ -64,7 +64,7 @@
   }
 
  private:
-  std::list<std::map<K, V> > scopes;
+  std::list<std::map<K, V>> scopes;
 };
 }  // namespace utils
 
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index e7ed9a7..0ded2d8 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -30,7 +30,7 @@
 LOCAL_MULTILIB := both
 LOCAL_MODULE_STEM_32 := dalvikvm32
 LOCAL_MODULE_STEM_64 := dalvikvm64
-include art/build/Android.libcxx.mk
+include external/libcxx/libcxx.mk
 include $(BUILD_EXECUTABLE)
 
 # create symlink for the primary version target.
@@ -51,7 +51,6 @@
 LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_IS_HOST_MODULE := true
-include art/build/Android.libcxx.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 endif
diff --git a/dalvikvm/dalvikvm.cc b/dalvikvm/dalvikvm.cc
index 3fa43dc..67794c8 100644
--- a/dalvikvm/dalvikvm.cc
+++ b/dalvikvm/dalvikvm.cc
@@ -18,14 +18,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-
 #include <algorithm>
+#include <memory>
 
 #include "jni.h"
 #include "JniInvocation.h"
 #include "ScopedLocalRef.h"
 #include "toStringArray.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -118,7 +117,7 @@
   // We're over-allocating, because this includes the options to the runtime
   // plus the options to the program.
   int option_count = argc;
-  UniquePtr<JavaVMOption[]> options(new JavaVMOption[option_count]());
+  std::unique_ptr<JavaVMOption[]> options(new JavaVMOption[option_count]());
 
   // Copy options over.  Everything up to the name of the class starts
   // with a '-' (the function hook stuff is strictly internal).
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 874c324..f0b5750 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -33,7 +33,7 @@
 #include "compiler.h"
 #include "compiler_callbacks.h"
 #include "dex_file-inl.h"
-#include "dex/pass_driver.h"
+#include "dex/pass_driver_me.h"
 #include "dex/verification_results.h"
 #include "driver/compiler_callbacks_impl.h"
 #include "driver/compiler_driver.h"
@@ -228,7 +228,7 @@
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
     CHECK(verification_results != nullptr);
     CHECK(method_inliner_map != nullptr);
-    UniquePtr<Dex2Oat> dex2oat(new Dex2Oat(&compiler_options,
+    std::unique_ptr<Dex2Oat> dex2oat(new Dex2Oat(&compiler_options,
                                            compiler_kind,
                                            instruction_set,
                                            instruction_set_features,
@@ -256,19 +256,19 @@
 
   // Reads the class names (java.lang.Object) and returns a set of descriptors (Ljava/lang/Object;)
   CompilerDriver::DescriptorSet* ReadImageClassesFromFile(const char* image_classes_filename) {
-    UniquePtr<std::ifstream> image_classes_file(new std::ifstream(image_classes_filename,
+    std::unique_ptr<std::ifstream> image_classes_file(new std::ifstream(image_classes_filename,
                                                                   std::ifstream::in));
     if (image_classes_file.get() == nullptr) {
       LOG(ERROR) << "Failed to open image classes file " << image_classes_filename;
       return nullptr;
     }
-    UniquePtr<CompilerDriver::DescriptorSet> result(ReadImageClasses(*image_classes_file.get()));
+    std::unique_ptr<CompilerDriver::DescriptorSet> result(ReadImageClasses(*image_classes_file.get()));
     image_classes_file->close();
     return result.release();
   }
 
   CompilerDriver::DescriptorSet* ReadImageClasses(std::istream& image_classes_stream) {
-    UniquePtr<CompilerDriver::DescriptorSet> image_classes(new CompilerDriver::DescriptorSet);
+    std::unique_ptr<CompilerDriver::DescriptorSet> image_classes(new CompilerDriver::DescriptorSet);
     while (image_classes_stream.good()) {
       std::string dot;
       std::getline(image_classes_stream, dot);
@@ -285,17 +285,17 @@
   CompilerDriver::DescriptorSet* ReadImageClassesFromZip(const char* zip_filename,
                                                          const char* image_classes_filename,
                                                          std::string* error_msg) {
-    UniquePtr<ZipArchive> zip_archive(ZipArchive::Open(zip_filename, error_msg));
+    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(zip_filename, error_msg));
     if (zip_archive.get() == nullptr) {
       return nullptr;
     }
-    UniquePtr<ZipEntry> zip_entry(zip_archive->Find(image_classes_filename, error_msg));
+    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(image_classes_filename, error_msg));
     if (zip_entry.get() == nullptr) {
       *error_msg = StringPrintf("Failed to find '%s' within '%s': %s", image_classes_filename,
                                 zip_filename, error_msg->c_str());
       return nullptr;
     }
-    UniquePtr<MemMap> image_classes_file(zip_entry->ExtractToMemMap(image_classes_filename,
+    std::unique_ptr<MemMap> image_classes_file(zip_entry->ExtractToMemMap(image_classes_filename,
                                                                     error_msg));
     if (image_classes_file.get() == nullptr) {
       *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", image_classes_filename,
@@ -315,7 +315,7 @@
                                       File* oat_file,
                                       const std::string& bitcode_filename,
                                       bool image,
-                                      UniquePtr<CompilerDriver::DescriptorSet>& image_classes,
+                                      std::unique_ptr<CompilerDriver::DescriptorSet>& image_classes,
                                       bool dump_stats,
                                       bool dump_passes,
                                       TimingLogger& timings,
@@ -339,7 +339,7 @@
       Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path_files);
     }
 
-    UniquePtr<CompilerDriver> driver(new CompilerDriver(compiler_options_,
+    std::unique_ptr<CompilerDriver> driver(new CompilerDriver(compiler_options_,
                                                         verification_results_,
                                                         method_inliner_map_,
                                                         compiler_kind_,
@@ -403,7 +403,7 @@
       oat_data_begin = image_writer.GetOatDataBegin();
     }
 
-    UniquePtr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
+    std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
     if (oat_file.get() == nullptr) {
       PLOG(ERROR) << "Failed to open ELF file: " << oat_filename;
       return false;
@@ -918,10 +918,10 @@
     } else if (option == "--no-profile-file") {
       // No profile
     } else if (option == "--print-pass-names") {
-      PassDriver::PrintPassNames();
+      PassDriverME::PrintPassNames();
     } else if (option.starts_with("--disable-passes=")) {
       std::string disable_passes = option.substr(strlen("--disable-passes=")).data();
-      PassDriver::CreateDefaultPassList(disable_passes);
+      PassDriverME::CreateDefaultPassList(disable_passes);
     } else {
       Usage("Unknown argument %s", option.data());
     }
@@ -1064,7 +1064,7 @@
   WatchDog watch_dog(watch_dog_enabled);
 
   // Check early that the result of compilation can be written
-  UniquePtr<File> oat_file;
+  std::unique_ptr<File> oat_file;
   bool create_file = !oat_unstripped.empty();  // as opposed to using open file descriptor
   if (create_file) {
     oat_file.reset(OS::CreateEmptyFile(oat_unstripped.c_str()));
@@ -1124,7 +1124,7 @@
     LOG(ERROR) << "Failed to create dex2oat";
     return EXIT_FAILURE;
   }
-  UniquePtr<Dex2Oat> dex2oat(p_dex2oat);
+  std::unique_ptr<Dex2Oat> dex2oat(p_dex2oat);
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now so that we don't starve GC.
   Thread* self = Thread::Current();
@@ -1136,7 +1136,7 @@
   WellKnownClasses::Init(self->GetJniEnv());
 
   // If --image-classes was specified, calculate the full list of classes to include in the image
-  UniquePtr<CompilerDriver::DescriptorSet> image_classes(nullptr);
+  std::unique_ptr<CompilerDriver::DescriptorSet> image_classes(nullptr);
   if (image_classes_filename != nullptr) {
     std::string error_msg;
     if (image_classes_zip_filename != nullptr) {
@@ -1162,7 +1162,7 @@
     if (dex_filenames.empty()) {
       ATRACE_BEGIN("Opening zip archive from file descriptor");
       std::string error_msg;
-      UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd, zip_location.c_str(),
+      std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd, zip_location.c_str(),
                                                                &error_msg));
       if (zip_archive.get() == nullptr) {
         LOG(ERROR) << "Failed to open zip from file descriptor for '" << zip_location << "': "
@@ -1190,7 +1190,7 @@
       for (size_t i = 0; i < dex_files.size(); ++i) {
         const DexFile* dex_file = dex_files[i];
         std::string tmp_file_name(StringPrintf("/data/local/tmp/dex2oat.%d.%zd.dex", getpid(), i));
-        UniquePtr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
+        std::unique_ptr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
         if (tmp_file.get() == nullptr) {
             PLOG(ERROR) << "Failed to open file " << tmp_file_name
                         << ". Try: adb shell chmod 777 /data/local/tmp";
@@ -1225,7 +1225,7 @@
     }
   }
 
-  UniquePtr<const CompilerDriver> compiler(dex2oat->CreateOatFile(boot_image_option,
+  std::unique_ptr<const CompilerDriver> compiler(dex2oat->CreateOatFile(boot_image_option,
                                                                   android_root,
                                                                   is_host,
                                                                   dex_files,
@@ -1324,10 +1324,10 @@
   if (oat_unstripped != oat_stripped) {
     timings.NewSplit("dex2oat OatFile copy");
     oat_file.reset();
-     UniquePtr<File> in(OS::OpenFileForReading(oat_unstripped.c_str()));
-    UniquePtr<File> out(OS::CreateEmptyFile(oat_stripped.c_str()));
+     std::unique_ptr<File> in(OS::OpenFileForReading(oat_unstripped.c_str()));
+    std::unique_ptr<File> out(OS::CreateEmptyFile(oat_stripped.c_str()));
     size_t buffer_size = 8192;
-    UniquePtr<uint8_t> buffer(new uint8_t[buffer_size]);
+    std::unique_ptr<uint8_t> buffer(new uint8_t[buffer_size]);
     while (true) {
       int bytes_read = TEMP_FAILURE_RETRY(read(in->Fd(), buffer.get(), buffer_size));
       if (bytes_read <= 0) {
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index 17828fd..dd4e9d5 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -46,7 +46,6 @@
   ifeq ($$(art_target_or_host),host)
      LOCAL_IS_HOST_MODULE := true
   endif
-  include art/build/Android.libcxx.mk
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-disassembler
@@ -89,6 +88,7 @@
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
   ifeq ($$(art_target_or_host),target)
+    include external/libcxx/libcxx.mk
     LOCAL_SHARED_LIBRARIES += libcutils libvixl
     include $(BUILD_SHARED_LIBRARY)
   else # host
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index aaf9ed5..dcae502 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -207,7 +207,7 @@
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != nullptr);
       std::string error_msg;
-      UniquePtr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
+      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
       if (dex_file.get() == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation()
             << "': " << error_msg;
@@ -235,7 +235,7 @@
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != NULL);
       std::string error_msg;
-      UniquePtr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
+      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
       if (dex_file.get() == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation()
             << "': " << error_msg;
@@ -289,7 +289,7 @@
     // Create the verifier early.
 
     std::string error_msg;
-    UniquePtr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
+    std::unique_ptr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
     if (dex_file.get() == NULL) {
       os << "NOT FOUND: " << error_msg << "\n\n";
       return;
@@ -732,7 +732,7 @@
   bool dump_raw_mapping_table_;
   bool dump_raw_gc_map_;
   std::set<uintptr_t> offsets_;
-  UniquePtr<Disassembler> disassembler_;
+  std::unique_ptr<Disassembler> disassembler_;
 };
 
 class ImageDumper {
@@ -871,7 +871,7 @@
       os_ = saved_os;
     }
     os << "STATS:\n" << std::flush;
-    UniquePtr<File> file(OS::OpenFileForReading(image_filename.c_str()));
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_filename.c_str()));
     if (file.get() == NULL) {
       LOG(WARNING) << "Failed to find image in " << image_filename;
     }
@@ -1190,7 +1190,7 @@
     std::vector<mirror::ArtMethod*> method_outlier;
     std::vector<size_t> method_outlier_size;
     std::vector<double> method_outlier_expansion;
-    std::vector<std::pair<std::string, size_t> > oat_dex_file_sizes;
+    std::vector<std::pair<std::string, size_t>> oat_dex_file_sizes;
 
     explicit Stats()
         : oat_file_bytes(0),
@@ -1438,7 +1438,7 @@
     // threshold, we assume 2 bytes per instruction and 2 instructions per block.
     kLargeMethodDexBytes = 16000
   };
-  UniquePtr<OatDumper> oat_dumper_;
+  std::unique_ptr<OatDumper> oat_dumper_;
   std::ostream* os_;
   gc::space::ImageSpace& image_space_;
   const ImageHeader& image_header_;
@@ -1465,7 +1465,7 @@
   const char* boot_image_filename = NULL;
   std::string elf_filename_prefix;
   std::ostream* os = &std::cout;
-  UniquePtr<std::ofstream> out;
+  std::unique_ptr<std::ofstream> out;
   bool dump_raw_mapping_table = false;
   bool dump_raw_gc_map = false;
 
@@ -1548,7 +1548,7 @@
     fprintf(stderr, "Failed to create runtime\n");
     return EXIT_FAILURE;
   }
-  UniquePtr<Runtime> runtime(Runtime::Current());
+  std::unique_ptr<Runtime> runtime(Runtime::Current());
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now and then switch to a more manageable ScopedObjectAccess.
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 4a23263..1521caa 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -297,6 +297,7 @@
 	lock_word.h \
 	mirror/class.h \
 	oat.h \
+	object_callbacks.h \
 	quick/inline_method_analyser.h \
 	thread.h \
 	thread_state.h \
@@ -353,8 +354,6 @@
     LOCAL_IS_HOST_MODULE := true
   endif
 
-  include art/build/Android.libcxx.mk
-
   GENERATED_SRC_DIR := $$(call local-generated-sources-dir)
   ENUM_OPERATOR_OUT_CC_FILES := $$(patsubst %.h,%_operator_out.cc,$$(LIBART_ENUM_OPERATOR_OUT_HEADER_FILES))
   ENUM_OPERATOR_OUT_GEN := $$(addprefix $$(GENERATED_SRC_DIR)/,$$(ENUM_OPERATOR_OUT_CC_FILES))
@@ -398,7 +397,12 @@
   endif
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES)
   LOCAL_SHARED_LIBRARIES += liblog libnativehelper
-  LOCAL_SHARED_LIBRARIES += libbacktrace # native stack trace support
+  ifeq ($$(art_target_or_host),target)
+    include external/libcxx/libcxx.mk
+    LOCAL_SHARED_LIBRARIES += libbacktrace_libc++
+  else
+    LOCAL_SHARED_LIBRARIES += libbacktrace
+  endif
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libcutils libdl libselinux libutils
     LOCAL_STATIC_LIBRARIES := libziparchive libz
diff --git a/runtime/UniquePtrCompat.h b/runtime/UniquePtrCompat.h
deleted file mode 100644
index 4a45616..0000000
--- a/runtime/UniquePtrCompat.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_UNIQUEPTRCOMPAT_H_
-#define ART_RUNTIME_UNIQUEPTRCOMPAT_H_
-
-// Stlport doesn't declare std::unique_ptr. UniquePtr.h declares an incompatible std::swap
-// prototype with libc++. This compatibility header file resolves differences between the two, in
-// the future UniquePtr will become std::unique_ptr.
-
-#ifdef ART_WITH_STLPORT
-
-#include "UniquePtr.h"
-
-#else   //  ART_WITH_STLPORT
-
-#include <memory>
-
-template <typename T>
-using UniquePtr = typename std::unique_ptr<T>;
-
-#endif  //  ART_WITH_STLPORT
-
-#endif  // ART_RUNTIME_UNIQUEPTRCOMPAT_H_
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index fae44af..09e8b59 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -99,14 +99,33 @@
   gprs_[X14] = NULL;
   gprs_[X15] = NULL;
 
-  fprs_[D8] = NULL;
-  fprs_[D9] = NULL;
-  fprs_[D10] = NULL;
-  fprs_[D11] = NULL;
-  fprs_[D12] = NULL;
-  fprs_[D13] = NULL;
-  fprs_[D14] = NULL;
-  fprs_[D15] = NULL;
+  // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
+
+  fprs_[D0] = NULL;
+  fprs_[D1] = NULL;
+  fprs_[D2] = NULL;
+  fprs_[D3] = NULL;
+  fprs_[D4] = NULL;
+  fprs_[D5] = NULL;
+  fprs_[D6] = NULL;
+  fprs_[D7] = NULL;
+
+  fprs_[D16] = NULL;
+  fprs_[D17] = NULL;
+  fprs_[D18] = NULL;
+  fprs_[D19] = NULL;
+  fprs_[D20] = NULL;
+  fprs_[D21] = NULL;
+  fprs_[D22] = NULL;
+  fprs_[D23] = NULL;
+  fprs_[D24] = NULL;
+  fprs_[D25] = NULL;
+  fprs_[D26] = NULL;
+  fprs_[D27] = NULL;
+  fprs_[D28] = NULL;
+  fprs_[D29] = NULL;
+  fprs_[D30] = NULL;
+  fprs_[D31] = NULL;
 }
 
 extern "C" void art_quick_do_long_jump(uint64_t*, uint64_t*);
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index f2050b3..ac922dd 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -42,7 +42,7 @@
 #endif
 
     // FP args
-    stp d1, d2,   [sp, #8]
+    stp d0, d1, [sp, #8]
     stp d2, d3, [sp, #24]
     stp d4, d5, [sp, #40]
     stp d6, d7, [sp, #56]
@@ -508,26 +508,42 @@
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
     /*
-     * TODO arm64 specifics need to be fleshed out.
      * All generated callsites for interface invokes and invocation slow paths will load arguments
-     * as usual - except instead of loading x0 with the target Method*, x0 will contain
-     * the method_idx.  This wrapper will save x1-x3, load the caller's Method*, align the
+     * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
+     * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
      * stack and call the appropriate C helper.
-     * NOTE: "this" is first visible argument of the target, and so can be found in x1.
+     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
      *
-     * The helper will attempt to locate the target and return a result in x0 consisting
+     * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
      * of the target Method* in x0 and method->code_ in x1.
      *
-     * If unsuccessful, the helper will return NULL/NULL. There will be a pending exception in the
+     * If unsuccessful, the helper will return NULL/????. There will be a pending exception in the
      * thread and we branch to another stub to deliver it.
      *
      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
      * pointing back to the original caller.
+     *
+     * Adapted from ARM32 code.
+     *
+     * Clobbers x12.
      */
 .macro INVOKE_TRAMPOLINE c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    brk 0
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    // Helper signature is always
+    // (method_idx, *this_object, *caller_method, *self, sp)
+
+    ldr    x2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE]  // pass caller Method*
+    mov    x3, xSELF                      // pass Thread::Current
+    mov    x4, sp
+    bl     \cxx_name                      // (method_idx, this, caller, Thread*, SP)
+    mov    x12, x1                         // save Method*->code_
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    cbz    x0, 1f                         // did we find the target? if not go to exception delivery
+    br     x12                             // tail call to target
+1:
+    DELIVER_PENDING_EXCEPTION
 END \c_name
 .endm
 
@@ -1381,8 +1397,17 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
-UNIMPLEMENTED art_quick_imt_conflict_trampoline
-
+    /*
+     * Called to resolve an imt conflict. x12 is a hidden argument that holds the target method's
+     * dex method index.
+     */
+ENTRY art_quick_imt_conflict_trampoline
+    ldr    x0, [sp, #0]                                // load caller Method*
+    ldr    w0, [x0, #METHOD_DEX_CACHE_METHODS_OFFSET]  // load dex_cache_resolved_methods
+    add    x0, x0, #OBJECT_ARRAY_DATA_OFFSET           // get starting address of data
+    ldr    w0, [x0, x12, lsl 2]                        // load the target method
+    b art_quick_invoke_interface_trampoline
+END art_quick_imt_conflict_trampoline
 
 ENTRY art_quick_resolution_trampoline
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index d9bc105..fac9883 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -16,6 +16,8 @@
 
 #include "common_runtime_test.h"
 #include "mirror/art_field-inl.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/class-inl.h"
 #include "mirror/string-inl.h"
 
 #include <cstdio>
@@ -50,6 +52,7 @@
         pair.first = "-Xmx4M";  // Smallest we can go.
       }
     }
+    options->push_back(std::make_pair("-Xint", nullptr));
   }
 
   // Helper function needed since TEST_F makes a new class.
@@ -59,116 +62,7 @@
 
  public:
   size_t Invoke3(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self) {
-    // Push a transition back into managed code onto the linked list in thread.
-    ManagedStack fragment;
-    self->PushManagedStackFragment(&fragment);
-
-    size_t result;
-#if defined(__i386__)
-    // TODO: Set the thread?
-    __asm__ __volatile__(
-        "pushl $0\n\t"               // Push nullptr to terminate quick stack
-        "call *%%edi\n\t"           // Call the stub
-        "addl $4, %%esp"               // Pop nullptr
-        : "=a" (result)
-          // Use the result from eax
-        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code)
-          // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-        : );  // clobber.
-    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
-    //       but compilation fails when declaring that.
-#elif defined(__arm__)
-    __asm__ __volatile__(
-        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
-        ".cfi_adjust_cfa_offset 52\n\t"
-        "push {r9}\n\t"
-        ".cfi_adjust_cfa_offset 4\n\t"
-        "mov r9, #0\n\n"
-        "str r9, [sp, #-8]!\n\t"   // Push nullptr to terminate stack, +8B padding so 16B aligned
-        ".cfi_adjust_cfa_offset 8\n\t"
-        "ldr r9, [sp, #8]\n\t"
-
-        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
-        "sub sp, sp, #20\n\t"
-        "str %[arg0], [sp]\n\t"
-        "str %[arg1], [sp, #4]\n\t"
-        "str %[arg2], [sp, #8]\n\t"
-        "str %[code], [sp, #12]\n\t"
-        "str %[self], [sp, #16]\n\t"
-        "ldr r0, [sp]\n\t"
-        "ldr r1, [sp, #4]\n\t"
-        "ldr r2, [sp, #8]\n\t"
-        "ldr r3, [sp, #12]\n\t"
-        "ldr r9, [sp, #16]\n\t"
-        "add sp, sp, #20\n\t"
-
-        "blx r3\n\t"                // Call the stub
-        "add sp, sp, #12\n\t"       // Pop nullptr and padding
-        ".cfi_adjust_cfa_offset -12\n\t"
-        "pop {r1-r12, lr}\n\t"      // Restore state
-        ".cfi_adjust_cfa_offset -52\n\t"
-        "mov %[result], r0\n\t"     // Save the result
-        : [result] "=r" (result)
-          // Use the result from r0
-        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
-        : );  // clobber.
-#elif defined(__aarch64__)
-    __asm__ __volatile__(
-        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
-        ".cfi_adjust_cfa_offset 48\n\t"
-        "stp xzr, x1,  [sp]\n\t"        // nullptr(end of quick stack), x1
-        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
-        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
-
-        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
-        "sub sp, sp, #48\n\t"
-        "str %[arg0], [sp]\n\t"
-        "str %[arg1], [sp, #8]\n\t"
-        "str %[arg2], [sp, #16]\n\t"
-        "str %[code], [sp, #24]\n\t"
-        "str %[self], [sp, #32]\n\t"
-        "ldr x0, [sp]\n\t"
-        "ldr x1, [sp, #8]\n\t"
-        "ldr x2, [sp, #16]\n\t"
-        "ldr x3, [sp, #24]\n\t"
-        "ldr x18, [sp, #32]\n\t"
-        "add sp, sp, #48\n\t"
-
-        "blr x3\n\t"              // Call the stub
-        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
-        "ldr x30, [sp, #40]\n\t"      // Restore xLR
-        "add sp, sp, #48\n\t"          // Free stack space
-        ".cfi_adjust_cfa_offset -48\n\t"
-
-        "mov %[result], x0\n\t"        // Save the result
-        : [result] "=r" (result)
-          // Use the result from r0
-        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
-        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
-#elif defined(__x86_64__)
-    // Note: Uses the native convention
-    // TODO: Set the thread?
-    __asm__ __volatile__(
-        "pushq $0\n\t"                 // Push nullptr to terminate quick stack
-        "pushq $0\n\t"                 // 16B alignment padding
-        ".cfi_adjust_cfa_offset 16\n\t"
-        "call *%%rax\n\t"              // Call the stub
-        "addq $16, %%rsp\n\t"              // Pop nullptr and padding
-        ".cfi_adjust_cfa_offset -16\n\t"
-        : "=a" (result)
-          // Use the result from rax
-        : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code)
-          // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
-    // TODO: Should we clobber the other registers?
-#else
-    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
-    result = 0;
-#endif
-    // Pop transition.
-    self->PopManagedStackFragment(fragment);
-    return result;
+    return Invoke3WithReferrer(arg0, arg1, arg2, code, self, nullptr);
   }
 
   // TODO: Set up a frame according to referrer's specs.
@@ -179,6 +73,7 @@
     self->PushManagedStackFragment(&fragment);
 
     size_t result;
+    size_t fpr_result = 0;
 #if defined(__i386__)
     // TODO: Set the thread?
     __asm__ __volatile__(
@@ -230,6 +125,14 @@
         : );  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
+        // Spill space for d8 - d15
+        "sub sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset 64\n\t"
+        "stp d8, d9,   [sp]\n\t"
+        "stp d10, d11, [sp, #16]\n\t"
+        "stp d12, d13, [sp, #32]\n\t"
+        "stp d14, d15, [sp, #48]\n\t"
+
         "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
         ".cfi_adjust_cfa_offset 48\n\t"
         "stp %[referrer], x1, [sp]\n\t"// referrer, x1
@@ -238,27 +141,118 @@
 
         // Push everything on the stack, so we don't rely on the order. What a mess. :-(
         "sub sp, sp, #48\n\t"
+        ".cfi_adjust_cfa_offset 48\n\t"
         "str %[arg0], [sp]\n\t"
         "str %[arg1], [sp, #8]\n\t"
         "str %[arg2], [sp, #16]\n\t"
         "str %[code], [sp, #24]\n\t"
         "str %[self], [sp, #32]\n\t"
+
+        // Now we definitely have x0-x3 free, use it to garble d8 - d15
+        "movk x0, #0xfad0\n\t"
+        "movk x0, #0xebad, lsl #16\n\t"
+        "movk x0, #0xfad0, lsl #32\n\t"
+        "movk x0, #0xebad, lsl #48\n\t"
+        "fmov d8, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d9, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d10, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d11, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d12, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d13, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d14, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d15, x0\n\t"
+
+        // Load call params
         "ldr x0, [sp]\n\t"
         "ldr x1, [sp, #8]\n\t"
         "ldr x2, [sp, #16]\n\t"
         "ldr x3, [sp, #24]\n\t"
         "ldr x18, [sp, #32]\n\t"
         "add sp, sp, #48\n\t"
-
-        "blr x3\n\t"              // Call the stub
-        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
-        "ldr x30, [sp, #40]\n\t"      // Restore xLR
-        "add sp, sp, #48\n\t"          // Free stack space
         ".cfi_adjust_cfa_offset -48\n\t"
 
+
+        "blr x3\n\t"              // Call the stub
+
+        // Test d8 - d15. We can use x1 and x2.
+        "movk x1, #0xfad0\n\t"
+        "movk x1, #0xebad, lsl #16\n\t"
+        "movk x1, #0xfad0, lsl #32\n\t"
+        "movk x1, #0xebad, lsl #48\n\t"
+        "fmov x2, d8\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d9\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d10\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d11\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d12\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d13\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d14\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d15\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+
+        "mov %[fpr_result], #0\n\t"
+
+        // Finish up.
+        "2:\n\t"
+        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"       // Restore xLR
+        "add sp, sp, #48\n\t"          // Free stack space
+        ".cfi_adjust_cfa_offset -48\n\t"
         "mov %[result], x0\n\t"        // Save the result
-        : [result] "=r" (result)
+
+        "ldp d8, d9,   [sp]\n\t"       // Restore d8 - d15
+        "ldp d10, d11, [sp, #16]\n\t"
+        "ldp d12, d13, [sp, #32]\n\t"
+        "ldp d14, d15, [sp, #48]\n\t"
+        "add sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset -64\n\t"
+
+        "b 3f\n\t"                     // Goto end
+
+        // Failed fpr verification.
+        "1:\n\t"
+        "mov %[fpr_result], #1\n\t"
+        "b 2b\n\t"                     // Goto finish-up
+
+        // End
+        "3:\n\t"
+        : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
           // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer)
@@ -285,6 +279,238 @@
 #endif
     // Pop transition.
     self->PopManagedStackFragment(fragment);
+
+    fp_result = fpr_result;
+    EXPECT_EQ(0U, fp_result);
+
+    return result;
+  }
+
+  // TODO: Set up a frame according to referrer's specs.
+  size_t Invoke3WithReferrerAndHidden(size_t arg0, size_t arg1, size_t arg2, uintptr_t code,
+                                      Thread* self, mirror::ArtMethod* referrer, size_t hidden) {
+    // Push a transition back into managed code onto the linked list in thread.
+    ManagedStack fragment;
+    self->PushManagedStackFragment(&fragment);
+
+    size_t result;
+    size_t fpr_result = 0;
+#if defined(__i386__)
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "movd %[hidden], %%xmm0\n\t"
+        "pushl %[referrer]\n\t"     // Store referrer
+        "call *%%edi\n\t"           // Call the stub
+        "addl $4, %%esp"            // Pop referrer
+        : "=a" (result)
+          // Use the result from eax
+          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden)
+            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+            : );  // clobber.
+    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
+    //       but compilation fails when declaring that.
+#elif defined(__arm__)
+    __asm__ __volatile__(
+        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
+        ".cfi_adjust_cfa_offset 52\n\t"
+        "push {r9}\n\t"
+        ".cfi_adjust_cfa_offset 4\n\t"
+        "mov r9, %[referrer]\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push referrer, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #24\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "str %[hidden], [sp, #20]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "ldr r12, [sp, #20]\n\t"
+        "add sp, sp, #24\n\t"
+
+        "blx r3\n\t"                // Call the stub
+        "add sp, sp, #12\n\t"       // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -12\n\t"
+        "pop {r1-r12, lr}\n\t"      // Restore state
+        ".cfi_adjust_cfa_offset -52\n\t"
+        "mov %[result], r0\n\t"     // Save the result
+        : [result] "=r" (result)
+          // Use the result from r0
+          : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+            [referrer] "r"(referrer), [hidden] "r"(hidden)
+            : );  // clobber.
+#elif defined(__aarch64__)
+    __asm__ __volatile__(
+        // Spill space for d8 - d15
+        "sub sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset 64\n\t"
+        "stp d8, d9,   [sp]\n\t"
+        "stp d10, d11, [sp, #16]\n\t"
+        "stp d12, d13, [sp, #32]\n\t"
+        "stp d14, d15, [sp, #48]\n\t"
+
+        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
+        ".cfi_adjust_cfa_offset 48\n\t"
+        "stp %[referrer], x1, [sp]\n\t"// referrer, x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        ".cfi_adjust_cfa_offset 48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+        "str %[hidden], [sp, #40]\n\t"
+
+        // Now we definitely have x0-x3 free, use it to garble d8 - d15
+        "movk x0, #0xfad0\n\t"
+        "movk x0, #0xebad, lsl #16\n\t"
+        "movk x0, #0xfad0, lsl #32\n\t"
+        "movk x0, #0xebad, lsl #48\n\t"
+        "fmov d8, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d9, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d10, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d11, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d12, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d13, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d14, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d15, x0\n\t"
+
+        // Load call params
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "ldr x12, [sp, #40]\n\t"
+        "add sp, sp, #48\n\t"
+        ".cfi_adjust_cfa_offset -48\n\t"
+
+
+        "blr x3\n\t"              // Call the stub
+
+        // Test d8 - d15. We can use x1 and x2.
+        "movk x1, #0xfad0\n\t"
+        "movk x1, #0xebad, lsl #16\n\t"
+        "movk x1, #0xfad0, lsl #32\n\t"
+        "movk x1, #0xebad, lsl #48\n\t"
+        "fmov x2, d8\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d9\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d10\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d11\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d12\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d13\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d14\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d15\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+
+        "mov %[fpr_result], #0\n\t"
+
+        // Finish up.
+        "2:\n\t"
+        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"       // Restore xLR
+        "add sp, sp, #48\n\t"          // Free stack space
+        ".cfi_adjust_cfa_offset -48\n\t"
+        "mov %[result], x0\n\t"        // Save the result
+
+        "ldp d8, d9,   [sp]\n\t"       // Restore d8 - d15
+        "ldp d10, d11, [sp, #16]\n\t"
+        "ldp d12, d13, [sp, #32]\n\t"
+        "ldp d14, d15, [sp, #48]\n\t"
+        "add sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset -64\n\t"
+
+        "b 3f\n\t"                     // Goto end
+
+        // Failed fpr verification.
+        "1:\n\t"
+        "mov %[fpr_result], #1\n\t"
+        "b 2b\n\t"                     // Goto finish-up
+
+        // End
+        "3:\n\t"
+        : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
+        // Use the result from r0
+        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+#elif defined(__x86_64__)
+    // Note: Uses the native convention
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "movq %[hidden], %%r9\n\t"     // No need to save r9, listed as clobbered
+        "movd %%r9, %%xmm0\n\t"
+        "pushq %[referrer]\n\t"        // Push referrer
+        "pushq (%%rsp)\n\t"            // & 16B alignment padding
+        ".cfi_adjust_cfa_offset 16\n\t"
+        "call *%%rax\n\t"              // Call the stub
+        "addq $16, %%rsp\n\t"          // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -16\n\t"
+        : "=a" (result)
+        // Use the result from rax
+        : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer), [hidden] "m"(hidden)
+        // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
+        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+    // TODO: Should we clobber the other registers?
+#else
+    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
+    result = 0;
+#endif
+    // Pop transition.
+    self->PopManagedStackFragment(fragment);
+
+    fp_result = fpr_result;
+    EXPECT_EQ(0U, fp_result);
+
     return result;
   }
 
@@ -314,6 +540,9 @@
     return 0;
 #endif
   }
+
+ protected:
+  size_t fp_result;
 };
 
 
@@ -843,7 +1072,7 @@
 
     // Use arbitrary large amount for now.
     static const size_t kMaxHandles = 1000000;
-    UniquePtr<StackHandleScope<kMaxHandles> > hsp(new StackHandleScope<kMaxHandles>(self));
+    std::unique_ptr<StackHandleScope<kMaxHandles>> hsp(new StackHandleScope<kMaxHandles>(self));
 
     std::vector<Handle<mirror::Object>> handles;
     // Start allocating with 128K
@@ -1450,4 +1679,116 @@
   TestFields(self, this, Primitive::Type::kPrimLong);
 }
 
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_imt_conflict_trampoline(void);
+#endif
+
+TEST_F(StubTest, IMT) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  ScopedObjectAccess soa(self);
+  StackHandleScope<7> hs(self);
+
+  JNIEnv* env = Thread::Current()->GetJniEnv();
+
+  // ArrayList
+
+  // Load ArrayList and used methods (JNI).
+  jclass arraylist_jclass = env->FindClass("java/util/ArrayList");
+  ASSERT_NE(nullptr, arraylist_jclass);
+  jmethodID arraylist_constructor = env->GetMethodID(arraylist_jclass, "<init>", "()V");
+  ASSERT_NE(nullptr, arraylist_constructor);
+  jmethodID contains_jmethod = env->GetMethodID(arraylist_jclass, "contains", "(Ljava/lang/Object;)Z");
+  ASSERT_NE(nullptr, contains_jmethod);
+  jmethodID add_jmethod = env->GetMethodID(arraylist_jclass, "add", "(Ljava/lang/Object;)Z");
+  ASSERT_NE(nullptr, add_jmethod);
+
+  // Get mirror representation.
+  Handle<mirror::ArtMethod> contains_amethod(hs.NewHandle(soa.DecodeMethod(contains_jmethod)));
+
+  // Patch up ArrayList.contains.
+  if (contains_amethod.Get()->GetEntryPointFromQuickCompiledCode() == nullptr) {
+    contains_amethod.Get()->SetEntryPointFromQuickCompiledCode(reinterpret_cast<void*>(
+        GetTlsPtr(self)->quick_entrypoints.pQuickToInterpreterBridge));
+  }
+
+  // List
+
+  // Load List and used methods (JNI).
+  jclass list_jclass = env->FindClass("java/util/List");
+  ASSERT_NE(nullptr, list_jclass);
+  jmethodID inf_contains_jmethod = env->GetMethodID(list_jclass, "contains", "(Ljava/lang/Object;)Z");
+  ASSERT_NE(nullptr, inf_contains_jmethod);
+
+  // Get mirror representation.
+  Handle<mirror::ArtMethod> inf_contains(hs.NewHandle(soa.DecodeMethod(inf_contains_jmethod)));
+
+  // Object
+
+  jclass obj_jclass = env->FindClass("java/lang/Object");
+  ASSERT_NE(nullptr, obj_jclass);
+  jmethodID obj_constructor = env->GetMethodID(obj_jclass, "<init>", "()V");
+  ASSERT_NE(nullptr, obj_constructor);
+
+  // Sanity check: check that there is a conflict for List.contains in ArrayList.
+
+  mirror::Class* arraylist_class = soa.Decode<mirror::Class*>(arraylist_jclass);
+  mirror::ArtMethod* m = arraylist_class->GetImTable()->Get(
+      inf_contains->GetDexMethodIndex() % ClassLinker::kImtSize);
+
+  if (!m->IsImtConflictMethod()) {
+    LOG(WARNING) << "Test is meaningless, no IMT conflict in setup: " <<
+        PrettyMethod(m, true);
+    LOG(WARNING) << "Please update StubTest.IMT.";
+    return;
+  }
+
+  // Create instances.
+
+  jobject jarray_list = env->NewObject(arraylist_jclass, arraylist_constructor);
+  ASSERT_NE(nullptr, jarray_list);
+  Handle<mirror::Object> array_list(hs.NewHandle(soa.Decode<mirror::Object*>(jarray_list)));
+
+  jobject jobj = env->NewObject(obj_jclass, obj_constructor);
+  ASSERT_NE(nullptr, jobj);
+  Handle<mirror::Object> obj(hs.NewHandle(soa.Decode<mirror::Object*>(jobj)));
+
+  // Invoke.
+
+  size_t result =
+      Invoke3WithReferrerAndHidden(0U, reinterpret_cast<size_t>(array_list.Get()),
+                                   reinterpret_cast<size_t>(obj.Get()),
+                                   reinterpret_cast<uintptr_t>(&art_quick_imt_conflict_trampoline),
+                                   self, contains_amethod.Get(),
+                                   static_cast<size_t>(inf_contains.Get()->GetDexMethodIndex()));
+
+  ASSERT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(static_cast<size_t>(JNI_FALSE), result);
+
+  // Add object.
+
+  env->CallBooleanMethod(jarray_list, add_jmethod, jobj);
+
+  ASSERT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr));
+
+  // Invoke again.
+
+  result = Invoke3WithReferrerAndHidden(0U, reinterpret_cast<size_t>(array_list.Get()),
+                                        reinterpret_cast<size_t>(obj.Get()),
+                                        reinterpret_cast<uintptr_t>(&art_quick_imt_conflict_trampoline),
+                                        self, contains_amethod.Get(),
+                                        static_cast<size_t>(inf_contains.Get()->GetDexMethodIndex()));
+
+  ASSERT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(static_cast<size_t>(JNI_TRUE), result);
+#else
+  LOG(INFO) << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 909bd3e..f1d0746 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -19,7 +19,7 @@
 
 #include "asm_support_x86.h"
 
-#if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
+#if defined(__APPLE__) || (defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5))
     // Clang's as(1) doesn't let you name macro parameters prior to 3.5.
     #define MACRO0(macro_name) .macro macro_name
     #define MACRO1(macro_name, macro_arg1) .macro macro_name
@@ -32,8 +32,6 @@
     #define PLT_VAR(name, index) SYMBOL($index)
     #define REG_VAR(name,index) %$index
     #define CALL_MACRO(name,index) $index
-    #define FUNCTION_TYPE(name,index) .type $index, @function
-    #define SIZE(name,index) .size $index, .-$index
 
     //  The use of $x for arguments mean that literals need to be represented with $$x in macros.
     #define LITERAL(value) $value
@@ -56,13 +54,22 @@
     #define PLT_VAR(name, index) name&@PLT
     #define REG_VAR(name,index) %name
     #define CALL_MACRO(name,index) name&
-    #define FUNCTION_TYPE(name,index) .type name&, @function
-    #define SIZE(name,index) .size name, .-name
 
     #define LITERAL(value) $value
     #define MACRO_LITERAL(value) $value
 #endif
 
+#if defined(__APPLE__)
+    #define FUNCTION_TYPE(name,index)
+    #define SIZE(name,index)
+#elif defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
+    #define FUNCTION_TYPE(name,index) .type $index, @function
+    #define SIZE(name,index) .size $index, .-$index
+#else
+    #define FUNCTION_TYPE(name,index) .type name&, @function
+    #define SIZE(name,index) .size name, .-name
+#endif
+
     // CFI support.
 #if !defined(__APPLE__)
     #define CFI_STARTPROC .cfi_startproc
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 9c86c75..ed7f246 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -239,24 +239,45 @@
 
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
-     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
+     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
      * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
      * stack and call the appropriate C helper.
-     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
+     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
      *
-     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
-     * of the target Method* in r0 and method->code_ in r1.
+     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
+     * of the target Method* in rax and method->code_ in rdx.
      *
-     * If unsuccessful, the helper will return NULL/NULL. There will bea pending exception in the
+     * If unsuccessful, the helper will return NULL/????. There will be a pending exception in the
      * thread and we branch to another stub to deliver it.
      *
-     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
-     * pointing back to the original caller.
+     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
+     * location on the stack.
+     *
+     * Adapted from x86 code.
      */
 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name, 0)
-    int3
-    int3
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    // Helper signature is always
+    // (method_idx, *this_object, *caller_method, *self, sp)
+
+    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %rdx  // pass caller Method*
+    movq %gs:THREAD_SELF_OFFSET, %rcx                      // pass Thread
+    movq %rsp, %r8                                         // pass SP
+
+    call PLT_VAR(cxx_name, 1)                   // cxx_name(arg1, arg2, caller method*, Thread*, SP)
+                                                           // save the code pointer
+    movq %rax, %rdi
+    movq %rdx, %rax
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    testq %rdi, %rdi
+    jz 1f
+
+    // Tail call to intended method.
+    jmp *%rax
+1:
+    DELIVER_PENDING_EXCEPTION
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
@@ -977,9 +998,18 @@
 END_FUNCTION art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict.
+     * Called to resolve an imt conflict. Clobbers %rax (which will be clobbered later anyways).
+     *
+     * xmm0 is a hidden argument that holds the target method's dex method index.
+     * TODO: With proper hard-float support, this needs to be kept in sync with the quick compiler.
      */
-UNIMPLEMENTED art_quick_imt_conflict_trampoline
+DEFINE_FUNCTION art_quick_imt_conflict_trampoline
+    movq 16(%rsp), %rdi            // load caller Method*
+    movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
+    movd %xmm0, %rax               // get target method index stored in xmm0
+    movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
+    jmp art_quick_invoke_interface_trampoline_local
+END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 6867fef..9262db6 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -17,7 +17,15 @@
 #ifndef ART_RUNTIME_ATOMIC_H_
 #define ART_RUNTIME_ATOMIC_H_
 
+#ifdef __clang__
+#define ART_HAVE_STDATOMIC 1
+#endif
+
 #include <stdint.h>
+#if ART_HAVE_STDATOMIC
+#include <atomic>
+#endif
+#include <limits>
 #include <vector>
 
 #include "base/logging.h"
@@ -27,6 +35,76 @@
 
 class Mutex;
 
+#if ART_HAVE_STDATOMIC
+template<typename T>
+class Atomic : public std::atomic<T> {
+ public:
+  COMPILE_ASSERT(sizeof(T) == sizeof(std::atomic<T>),
+                 std_atomic_size_differs_from_that_of_underlying_type);
+  COMPILE_ASSERT(alignof(T) == alignof(std::atomic<T>),
+                 std_atomic_alignment_differs_from_that_of_underlying_type);
+
+  Atomic<T>() : std::atomic<T>() { }
+
+  explicit Atomic<T>(T value) : std::atomic<T>(value) { }
+
+  // Load from memory without ordering or synchronization constraints.
+  T LoadRelaxed() const {
+    return this->load(std::memory_order_relaxed);
+  }
+
+  // Load from memory with a total ordering.
+  T LoadSequentiallyConsistent() const {
+    return this->load(std::memory_order_seq_cst);
+  }
+
+  // Store to memory without ordering or synchronization constraints.
+  void StoreRelaxed(T desired) {
+    this->store(desired, std::memory_order_relaxed);
+  }
+
+  // Store to memory with a total ordering.
+  void StoreSequentiallyConsistent(T desired) {
+    this->store(desired, std::memory_order_seq_cst);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Doesn't
+  // imply ordering or synchronization constraints.
+  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior writes
+  // made to other memory locations by the thread that did the release become visible in this
+  // thread.
+  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. prior writes
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
+  }
+
+  T FetchAndAddSequentiallyConsistent(const T value) {
+    return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
+  }
+
+  T FetchAndSubSequentiallyConsistent(const T value) {
+    return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
+  }
+
+  volatile T* Address() {
+    return reinterpret_cast<T*>(this);
+  }
+
+  static T MaxValue() {
+    return std::numeric_limits<T>::max();
+  }
+};
+#else
 template<typename T>
 class Atomic {
  public:
@@ -34,24 +112,54 @@
 
   explicit Atomic<T>(T value) : value_(value) { }
 
-  Atomic<T>& operator=(T desired) {
-    Store(desired);
-    return *this;
-  }
-
-  T Load() const {
+  // Load from memory without ordering or synchronization constraints.
+  T LoadRelaxed() const {
     return value_;
   }
 
-  operator T() const {
-    return Load();
+  // Load from memory with a total ordering.
+  T LoadSequentiallyConsistent() const;
+
+  // Store to memory without ordering or synchronization constraints.
+  void StoreRelaxed(T desired) {
+    value_ = desired;
   }
 
-  T FetchAndAdd(const T value) {
+  // Store to memory with a total ordering.
+  void StoreSequentiallyConsistent(T desired);
+
+  // Atomically replace the value with desired value if it matches the expected value. Doesn't
+  // imply ordering or synchronization constraints.
+  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
+    // TODO: make this relaxed.
+    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior writes
+  // made to other memory locations by the thread that did the release become visible in this
+  // thread.
+  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
+    // TODO: make this acquire.
+    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. prior writes
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
+    // TODO: make this release.
+    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
+  }
+
+  volatile T* Address() {
+    return &value_;
+  }
+
+  T FetchAndAddSequentiallyConsistent(const T value) {
     return __sync_fetch_and_add(&value_, value);  // Return old_value.
   }
 
-  T FetchAndSub(const T value) {
+  T FetchAndSubSequentiallyConsistent(const T value) {
     return __sync_fetch_and_sub(&value_, value);  // Return old value.
   }
 
@@ -71,22 +179,14 @@
     return __sync_fetch_and_sub(&value_, 1);  // Return old value.
   }
 
-  bool CompareAndSwap(T expected_value, T desired_value) {
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  volatile T* Address() {
-    return &value_;
+  static T MaxValue() {
+    return std::numeric_limits<T>::max();
   }
 
  private:
-  // Unsafe = operator for non atomic operations on the integer.
-  void Store(T desired) {
-    value_ = desired;
-  }
-
-  volatile T value_;
+  T value_;
 };
+#endif
 
 typedef Atomic<int32_t> AtomicInteger;
 
@@ -196,7 +296,7 @@
 
   // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes?
   static bool LongAtomicsUseMutexes() {
-    return !kNeedSwapMutexes;
+    return kNeedSwapMutexes;
   }
 
   static void MembarLoadStore() {
@@ -260,6 +360,23 @@
   DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
 };
 
+#if !ART_HAVE_STDATOMIC
+template<typename T>
+inline T Atomic<T>::LoadSequentiallyConsistent() const {
+  T result = value_;
+  QuasiAtomic::MembarLoadLoad();
+  return result;
+}
+
+template<typename T>
+inline void Atomic<T>::StoreSequentiallyConsistent(T desired) {
+  QuasiAtomic::MembarStoreStore();
+  value_ = desired;
+  QuasiAtomic::MembarStoreLoad();
+}
+
+#endif
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ATOMIC_H_
diff --git a/runtime/barrier.h b/runtime/barrier.h
index d3e6bae..a433cac 100644
--- a/runtime/barrier.h
+++ b/runtime/barrier.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_BARRIER_H_
 #define ART_RUNTIME_BARRIER_H_
 
+#include <memory>
 #include "base/mutex.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index a02c4c7..086ef44 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -22,7 +22,6 @@
 #include "common_runtime_test.h"
 #include "mirror/object_array-inl.h"
 #include "thread_pool.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 class CheckWaitTask : public Task {
@@ -78,20 +77,20 @@
   barrier.Increment(self, num_threads);
   // At this point each thread should have passed through the barrier. The first count should be
   // equal to num_threads.
-  EXPECT_EQ(num_threads, count1);
+  EXPECT_EQ(num_threads, count1.LoadRelaxed());
   // Count 3 should still be zero since no thread should have gone past the second barrier.
-  EXPECT_EQ(0, count3);
+  EXPECT_EQ(0, count3.LoadRelaxed());
   // Now lets tell the threads to pass again.
   barrier.Increment(self, num_threads);
   // Count 2 should be equal to num_threads since each thread must have passed the second barrier
   // at this point.
-  EXPECT_EQ(num_threads, count2);
+  EXPECT_EQ(num_threads, count2.LoadRelaxed());
   // Wait for all the threads to finish.
   thread_pool.Wait(self, true, false);
   // All three counts should be equal to num_threads now.
-  EXPECT_EQ(count1, count2);
-  EXPECT_EQ(count2, count3);
-  EXPECT_EQ(num_threads, count3);
+  EXPECT_EQ(count1.LoadRelaxed(), count2.LoadRelaxed());
+  EXPECT_EQ(count2.LoadRelaxed(), count3.LoadRelaxed());
+  EXPECT_EQ(num_threads, count3.LoadRelaxed());
 }
 
 class CheckPassTask : public Task {
@@ -134,7 +133,7 @@
   // Wait for all the tasks to complete using the barrier.
   barrier.Increment(self, expected_total_tasks);
   // The total number of completed tasks should be equal to expected_total_tasks.
-  EXPECT_EQ(count, expected_total_tasks);
+  EXPECT_EQ(count.LoadRelaxed(), expected_total_tasks);
 }
 
 }  // namespace art
diff --git a/runtime/base/bit_vector_test.cc b/runtime/base/bit_vector_test.cc
index 990d1db..0f866a4 100644
--- a/runtime/base/bit_vector_test.cc
+++ b/runtime/base/bit_vector_test.cc
@@ -14,7 +14,8 @@
  * limitations under the License.
  */
 
-#include "UniquePtrCompat.h"
+#include <memory>
+
 #include "bit_vector.h"
 #include "gtest/gtest.h"
 
@@ -40,7 +41,7 @@
   BitVector::Iterator empty_iterator(&bv);
   EXPECT_EQ(-1, empty_iterator.Next());
 
-  UniquePtr<BitVector::Iterator> empty_iterator_on_heap(bv.GetIterator());
+  std::unique_ptr<BitVector::Iterator> empty_iterator_on_heap(bv.GetIterator());
   EXPECT_EQ(-1, empty_iterator_on_heap->Next());
 
   bv.SetBit(0);
diff --git a/runtime/base/histogram_test.cc b/runtime/base/histogram_test.cc
index d72ae47..454f2ab 100644
--- a/runtime/base/histogram_test.cc
+++ b/runtime/base/histogram_test.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
+#include <memory>
+#include <sstream>
+
 #include "gtest/gtest.h"
 #include "histogram-inl.h"
-#include "UniquePtrCompat.h"
-
-#include <sstream>
 
 namespace art {
 
@@ -34,7 +34,7 @@
 //   PerValue = hist->PercentileVal(0.50); finds the 50th percentile(median).
 
 TEST(Histtest, MeanTest) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("MeanTest", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("MeanTest", 5));
 
   double mean;
   for (size_t Idx = 0; Idx < 90; Idx++) {
@@ -52,7 +52,7 @@
 }
 
 TEST(Histtest, VarianceTest) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("VarianceTest", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("VarianceTest", 5));
 
   double variance;
   hist->AddValue(9);
@@ -64,7 +64,7 @@
 }
 
 TEST(Histtest, Percentile) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("Percentile", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("Percentile", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
@@ -91,7 +91,7 @@
 }
 
 TEST(Histtest, UpdateRange) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("UpdateRange", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("UpdateRange", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
@@ -131,7 +131,7 @@
 }
 
 TEST(Histtest, Reset) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("Reset", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("Reset", 5));
 
   double PerValue;
   hist->AddValue(0);
@@ -174,7 +174,7 @@
 }
 
 TEST(Histtest, MultipleCreateHist) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("MultipleCreateHist", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("MultipleCreateHist", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
@@ -213,7 +213,7 @@
 }
 
 TEST(Histtest, SingleValue) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("SingleValue", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("SingleValue", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   hist->AddValue(1);
@@ -225,7 +225,7 @@
 }
 
 TEST(Histtest, CappingPercentiles) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("CappingPercentiles", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("CappingPercentiles", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   double per_995;
@@ -251,7 +251,7 @@
 }
 
 TEST(Histtest, SpikyValues) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("SpikyValues", 5, 4096));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("SpikyValues", 5, 4096));
   Histogram<uint64_t>::CumulativeData data;
 
   for (uint64_t idx = 0ull; idx < 30ull; idx++) {
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index b6c6b9b..b2ad1d0 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -19,7 +19,6 @@
 #include "base/mutex.h"
 #include "runtime.h"
 #include "thread-inl.h"
-#include "UniquePtrCompat.h"
 #include "utils.h"
 
 namespace art {
@@ -31,9 +30,9 @@
 unsigned int gAborting = 0;
 
 static LogSeverity gMinimumLogSeverity = INFO;
-static UniquePtr<std::string> gCmdLine;
-static UniquePtr<std::string> gProgramInvocationName;
-static UniquePtr<std::string> gProgramInvocationShortName;
+static std::unique_ptr<std::string> gCmdLine;
+static std::unique_ptr<std::string> gProgramInvocationName;
+static std::unique_ptr<std::string> gProgramInvocationShortName;
 
 const char* GetCmdLine() {
   return (gCmdLine.get() != nullptr) ? gCmdLine->c_str() : nullptr;
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 7800cfe..814195c 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -20,12 +20,13 @@
 #include <cerrno>
 #include <cstring>
 #include <iostream>  // NOLINT
+#include <memory>
 #include <sstream>
 #include <signal.h>
 #include <vector>
+
 #include "base/macros.h"
 #include "log_severity.h"
-#include "UniquePtrCompat.h"
 
 #define CHECK(x) \
   if (UNLIKELY(!(x))) \
@@ -202,7 +203,7 @@
  private:
   static void LogLine(const LogMessageData& data, const char*);
 
-  const UniquePtr<LogMessageData> data_;
+  const std::unique_ptr<LogMessageData> data_;
 
   friend void HandleUnexpectedSignal(int signal_number, siginfo_t* info, void* raw_context);
   friend class Mutex;
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index 8175514..47571f8 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -169,7 +169,7 @@
 // bionic and glibc both have TEMP_FAILURE_RETRY, but Mac OS' libc doesn't.
 #ifndef TEMP_FAILURE_RETRY
 #define TEMP_FAILURE_RETRY(exp) ({ \
-  typeof(exp) _rc; \
+  decltype(exp) _rc; \
   do { \
     _rc = (exp); \
   } while (_rc == -1 && errno == EINTR); \
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index a7e25cb..adf4c66 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -221,7 +221,7 @@
       // Reduce state by 1.
       done = android_atomic_release_cas(cur_state, cur_state - 1, &state_) == 0;
       if (done && (cur_state - 1) == 0) {  // cas may fail due to noise?
-        if (num_pending_writers_ > 0 || num_pending_readers_ > 0) {
+        if (num_pending_writers_.LoadRelaxed() > 0 || num_pending_readers_ > 0) {
           // Wake any exclusive waiters as there are now no readers.
           futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
         }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 2bc17bf..6f7f2c1 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -71,12 +71,12 @@
 class ScopedAllMutexesLock {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
-    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(0, mutex)) {
+    while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakAcquire(0, mutex)) {
       NanoSleep(100);
     }
   }
   ~ScopedAllMutexesLock() {
-    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(mutex_, 0)) {
+    while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakRelease(mutex_, 0)) {
       NanoSleep(100);
     }
   }
@@ -174,34 +174,34 @@
                                  uint64_t owner_tid,
                                  uint64_t nano_time_blocked) {
   if (kLogLockContentions) {
-    ContentionLogData* data = contetion_log_data_;
+    ContentionLogData* data = contention_log_data_;
     ++(data->contention_count);
     data->AddToWaitTime(nano_time_blocked);
     ContentionLogEntry* log = data->contention_log;
     // This code is intentionally racy as it is only used for diagnostics.
-    uint32_t slot = data->cur_content_log_entry;
+    uint32_t slot = data->cur_content_log_entry.LoadRelaxed();
     if (log[slot].blocked_tid == blocked_tid &&
         log[slot].owner_tid == blocked_tid) {
       ++log[slot].count;
     } else {
       uint32_t new_slot;
       do {
-        slot = data->cur_content_log_entry;
+        slot = data->cur_content_log_entry.LoadRelaxed();
         new_slot = (slot + 1) % kContentionLogSize;
-      } while (!data->cur_content_log_entry.CompareAndSwap(slot, new_slot));
+      } while (!data->cur_content_log_entry.CompareExchangeWeakRelaxed(slot, new_slot));
       log[new_slot].blocked_tid = blocked_tid;
       log[new_slot].owner_tid = owner_tid;
-      log[new_slot].count = 1;
+      log[new_slot].count.StoreRelaxed(1);
     }
   }
 }
 
 void BaseMutex::DumpContention(std::ostream& os) const {
   if (kLogLockContentions) {
-    const ContentionLogData* data = contetion_log_data_;
+    const ContentionLogData* data = contention_log_data_;
     const ContentionLogEntry* log = data->contention_log;
     uint64_t wait_time = data->wait_time;
-    uint32_t contention_count = data->contention_count;
+    uint32_t contention_count = data->contention_count.LoadRelaxed();
     if (contention_count == 0) {
       os << "never contended";
     } else {
@@ -213,7 +213,7 @@
       for (size_t i = 0; i < kContentionLogSize; ++i) {
         uint64_t blocked_tid = log[i].blocked_tid;
         uint64_t owner_tid = log[i].owner_tid;
-        uint32_t count = log[i].count;
+        uint32_t count = log[i].count.LoadRelaxed();
         if (count > 0) {
           auto it = most_common_blocked.find(blocked_tid);
           if (it != most_common_blocked.end()) {
@@ -261,7 +261,7 @@
 #if ART_USE_FUTEXES
   state_ = 0;
   exclusive_owner_ = 0;
-  num_contenders_ = 0;
+  DCHECK_EQ(0, num_contenders_.LoadRelaxed());
 #elif defined(__BIONIC__) || defined(__APPLE__)
   // Use recursive mutexes for bionic and Apple otherwise the
   // non-recursive mutexes don't have TIDs to check lock ownership of.
@@ -283,7 +283,8 @@
     LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_;
   } else {
     CHECK_EQ(exclusive_owner_, 0U)  << "unexpectedly found an owner on unlocked mutex " << name_;
-    CHECK_EQ(num_contenders_, 0) << "unexpectedly found a contender on mutex " << name_;
+    CHECK_EQ(num_contenders_.LoadRelaxed(), 0)
+        << "unexpectedly found a contender on mutex " << name_;
   }
 #else
   // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
@@ -406,7 +407,7 @@
       done =  __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
       if (LIKELY(done)) {  // Spurious fail?
         // Wake a contender
-        if (UNLIKELY(num_contenders_ > 0)) {
+        if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
           futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
         }
       }
@@ -459,7 +460,7 @@
   CHECK_EQ(state_, 0);
   CHECK_EQ(exclusive_owner_, 0U);
   CHECK_EQ(num_pending_readers_, 0);
-  CHECK_EQ(num_pending_writers_, 0);
+  CHECK_EQ(num_pending_writers_.LoadRelaxed(), 0);
 #else
   // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
   // may still be using locks.
@@ -523,7 +524,7 @@
       done =  __sync_bool_compare_and_swap(&state_, -1 /* cur_state*/, 0 /* new state */);
       if (LIKELY(done)) {  // cmpxchg may fail due to noise?
         // Wake any waiters.
-        if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_ > 0)) {
+        if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_.LoadRelaxed() > 0)) {
           futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
         }
       }
@@ -646,7 +647,7 @@
 ConditionVariable::ConditionVariable(const char* name, Mutex& guard)
     : name_(name), guard_(guard) {
 #if ART_USE_FUTEXES
-  sequence_ = 0;
+  DCHECK_EQ(0, sequence_.LoadRelaxed());
   num_waiters_ = 0;
 #else
   pthread_condattr_t cond_attrs;
@@ -691,7 +692,7 @@
     sequence_++;  // Indicate the broadcast occurred.
     bool done = false;
     do {
-      int32_t cur_sequence = sequence_;
+      int32_t cur_sequence = sequence_.LoadRelaxed();
       // Requeue waiters onto mutex. The waiter holds the contender count on the mutex high ensuring
       // mutex unlocks will awaken the requeued waiter thread.
       done = futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0,
@@ -740,7 +741,7 @@
   // Ensure the Mutex is contended so that requeued threads are awoken.
   guard_.num_contenders_++;
   guard_.recursion_count_ = 1;
-  int32_t cur_sequence = sequence_;
+  int32_t cur_sequence = sequence_.LoadRelaxed();
   guard_.ExclusiveUnlock(self);
   if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, NULL, NULL, 0) != 0) {
     // Futex failed, check it is an expected error.
@@ -754,7 +755,7 @@
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
   // We awoke and so no longer require awakes from the guard_'s unlock.
-  CHECK_GE(guard_.num_contenders_, 0);
+  CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0);
   guard_.num_contenders_--;
 #else
   guard_.recursion_count_ = 0;
@@ -775,7 +776,7 @@
   // Ensure the Mutex is contended so that requeued threads are awoken.
   guard_.num_contenders_++;
   guard_.recursion_count_ = 1;
-  int32_t cur_sequence = sequence_;
+  int32_t cur_sequence = sequence_.LoadRelaxed();
   guard_.ExclusiveUnlock(self);
   if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, &rel_ts, NULL, 0) != 0) {
     if (errno == ETIMEDOUT) {
@@ -790,7 +791,7 @@
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
   // We awoke and so no longer require awakes from the guard_'s unlock.
-  CHECK_GE(guard_.num_contenders_, 0);
+  CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0);
   guard_.num_contenders_--;
 #else
 #if !defined(__APPLE__)
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3f35670..e13c8d5 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -160,12 +160,12 @@
     void AddToWaitTime(uint64_t value);
     ContentionLogData() : wait_time(0) {}
   };
-  ContentionLogData contetion_log_data_[kContentionLogDataSize];
+  ContentionLogData contention_log_data_[kContentionLogDataSize];
 
  public:
   bool HasEverContended() const {
     if (kLogLockContentions) {
-      return contetion_log_data_->contention_count > 0;
+      return contention_log_data_->contention_count.LoadSequentiallyConsistent() > 0;
     }
     return false;
   }
diff --git a/runtime/base/unix_file/random_access_file_test.h b/runtime/base/unix_file/random_access_file_test.h
index 67e8c22..1d0b866 100644
--- a/runtime/base/unix_file/random_access_file_test.h
+++ b/runtime/base/unix_file/random_access_file_test.h
@@ -18,11 +18,10 @@
 #define ART_RUNTIME_BASE_UNIX_FILE_RANDOM_ACCESS_FILE_TEST_H_
 
 #include <errno.h>
-
+#include <memory>
 #include <string>
 
 #include "common_runtime_test.h"
-#include "UniquePtrCompat.h"
 
 namespace unix_file {
 
@@ -62,7 +61,7 @@
 
   void TestRead() {
     char buf[256];
-    UniquePtr<RandomAccessFile> file(MakeTestFile());
+    std::unique_ptr<RandomAccessFile> file(MakeTestFile());
 
     // Reading from the start of an empty file gets you zero bytes, however many
     // you ask for.
@@ -77,7 +76,7 @@
 
   void TestReadContent(const std::string& content, RandomAccessFile* file) {
     const int buf_size = content.size() + 10;
-    UniquePtr<char> buf(new char[buf_size]);
+    std::unique_ptr<char> buf(new char[buf_size]);
     // Can't read from a negative offset.
     ASSERT_EQ(-EINVAL, file->Read(buf.get(), 0, -123));
 
@@ -107,7 +106,7 @@
 
   void TestSetLength() {
     const std::string content("hello");
-    UniquePtr<RandomAccessFile> file(MakeTestFile());
+    std::unique_ptr<RandomAccessFile> file(MakeTestFile());
     ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
     ASSERT_EQ(content.size(), static_cast<uint64_t>(file->GetLength()));
 
@@ -132,7 +131,7 @@
 
   void TestWrite() {
     const std::string content("hello");
-    UniquePtr<RandomAccessFile> file(MakeTestFile());
+    std::unique_ptr<RandomAccessFile> file(MakeTestFile());
 
     // Can't write to a negative offset.
     ASSERT_EQ(-EINVAL, file->Write(content.data(), 0, -123));
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 363e8b2..c7302b5 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -19,8 +19,8 @@
 #include <fcntl.h>
 #include <sys/file.h>
 #include <sys/stat.h>
-
 #include <deque>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -62,7 +62,6 @@
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtrCompat.h"
 #include "utils.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
@@ -207,7 +206,8 @@
   heap->IncrementDisableMovingGC(self);
   StackHandleScope<64> hs(self);  // 64 is picked arbitrarily.
   Handle<mirror::Class> java_lang_Class(hs.NewHandle(down_cast<mirror::Class*>(
-      heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass), VoidFunctor()))));
+      heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass),
+                                        VoidFunctor()))));
   CHECK(java_lang_Class.Get() != NULL);
   mirror::Class::SetClassClass(java_lang_Class.Get());
   java_lang_Class->SetClass(java_lang_Class.Get());
@@ -237,15 +237,18 @@
   object_array_class->SetComponentType(java_lang_Object.Get());
 
   // Setup the char class to be used for char[].
-  Handle<mirror::Class> char_class(hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> char_class(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
+                                                           sizeof(mirror::Class))));
 
   // Setup the char[] class to be used for String.
-  Handle<mirror::Class> char_array_class(hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  Handle<mirror::Class> char_array_class(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
+                                                                 sizeof(mirror::Class))));
   char_array_class->SetComponentType(char_class.Get());
   mirror::CharArray::SetArrayClass(char_array_class.Get());
 
   // Setup String.
-  Handle<mirror::Class> java_lang_String(hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::StringClass))));
+  Handle<mirror::Class> java_lang_String(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
+                                                                 sizeof(mirror::StringClass))));
   mirror::String::SetClass(java_lang_String.Get());
   java_lang_String->SetObjectSize(sizeof(mirror::String));
   java_lang_String->SetStatus(mirror::Class::kStatusResolved, self);
@@ -290,7 +293,8 @@
   java_lang_DexCache->SetObjectSize(sizeof(mirror::DexCache));
   java_lang_DexCache->SetStatus(mirror::Class::kStatusResolved, self);
 
-  // Constructor, Field, Method, and AbstractMethod are necessary so that FindClass can link members.
+  // Constructor, Field, Method, and AbstractMethod are necessary so
+  // that FindClass can link members.
   Handle<mirror::Class> java_lang_reflect_ArtField(
       hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::ArtFieldClass))));
   CHECK(java_lang_reflect_ArtField.Get() != NULL);
@@ -424,7 +428,8 @@
   mirror::Class* Art_field_class = FindSystemClass(self, "Ljava/lang/reflect/ArtField;");
   CHECK_EQ(java_lang_reflect_ArtField.Get(), Art_field_class);
 
-  mirror::Class* String_array_class = FindSystemClass(self, class_roots_descriptors_[kJavaLangStringArrayClass]);
+  mirror::Class* String_array_class =
+      FindSystemClass(self, class_roots_descriptors_[kJavaLangStringArrayClass]);
   CHECK_EQ(object_array_string.Get(), String_array_class);
 
   mirror::Class* Art_method_array_class =
@@ -444,18 +449,22 @@
   // java.lang.ref classes need to be specially flagged, but otherwise are normal classes
   mirror::Class* java_lang_ref_Reference = FindSystemClass(self, "Ljava/lang/ref/Reference;");
   SetClassRoot(kJavaLangRefReference, java_lang_ref_Reference);
-  mirror::Class* java_lang_ref_FinalizerReference = FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
+  mirror::Class* java_lang_ref_FinalizerReference =
+      FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
   java_lang_ref_FinalizerReference->SetAccessFlags(
       java_lang_ref_FinalizerReference->GetAccessFlags() |
           kAccClassIsReference | kAccClassIsFinalizerReference);
-  mirror::Class* java_lang_ref_PhantomReference = FindSystemClass(self, "Ljava/lang/ref/PhantomReference;");
+  mirror::Class* java_lang_ref_PhantomReference =
+      FindSystemClass(self, "Ljava/lang/ref/PhantomReference;");
   java_lang_ref_PhantomReference->SetAccessFlags(
       java_lang_ref_PhantomReference->GetAccessFlags() |
           kAccClassIsReference | kAccClassIsPhantomReference);
-  mirror::Class* java_lang_ref_SoftReference = FindSystemClass(self, "Ljava/lang/ref/SoftReference;");
+  mirror::Class* java_lang_ref_SoftReference =
+      FindSystemClass(self, "Ljava/lang/ref/SoftReference;");
   java_lang_ref_SoftReference->SetAccessFlags(
       java_lang_ref_SoftReference->GetAccessFlags() | kAccClassIsReference);
-  mirror::Class* java_lang_ref_WeakReference = FindSystemClass(self, "Ljava/lang/ref/WeakReference;");
+  mirror::Class* java_lang_ref_WeakReference =
+      FindSystemClass(self, "Ljava/lang/ref/WeakReference;");
   java_lang_ref_WeakReference->SetAccessFlags(
       java_lang_ref_WeakReference->GetAccessFlags() |
           kAccClassIsReference | kAccClassIsWeakReference);
@@ -469,9 +478,11 @@
   // java.lang.StackTraceElement as a convenience.
   SetClassRoot(kJavaLangThrowable, FindSystemClass(self, "Ljava/lang/Throwable;"));
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
-  SetClassRoot(kJavaLangClassNotFoundException, FindSystemClass(self, "Ljava/lang/ClassNotFoundException;"));
+  SetClassRoot(kJavaLangClassNotFoundException,
+               FindSystemClass(self, "Ljava/lang/ClassNotFoundException;"));
   SetClassRoot(kJavaLangStackTraceElement, FindSystemClass(self, "Ljava/lang/StackTraceElement;"));
-  SetClassRoot(kJavaLangStackTraceElementArrayClass, FindSystemClass(self, "[Ljava/lang/StackTraceElement;"));
+  SetClassRoot(kJavaLangStackTraceElementArrayClass,
+               FindSystemClass(self, "[Ljava/lang/StackTraceElement;"));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
   FinishInit(self);
@@ -557,7 +568,7 @@
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
   std::string boot_image_option("--boot-image=");
-  boot_image_option += heap->GetImageSpace()->GetImageFilename();
+  boot_image_option += heap->GetImageSpace()->GetImageLocation();
 
   std::string dex_file_option("--dex-file=");
   dex_file_option += dex_filename;
@@ -627,8 +638,8 @@
   return FindOpenedOatFileFromDexLocation(dex_location, &dex_location_checksum);
 }
 
-const OatFile* ClassLinker::FindOpenedOatFileFromDexLocation(const char* dex_location,
-                                                             const uint32_t* const dex_location_checksum) {
+const OatFile* ClassLinker::FindOpenedOatFileFromDexLocation(
+    const char* dex_location, const uint32_t* const dex_location_checksum) {
   ReaderMutexLock mu(Thread::Current(), dex_lock_);
   for (size_t i = 0; i < oat_files_.size(); i++) {
     const OatFile* oat_file = oat_files_[i];
@@ -647,7 +658,7 @@
                                                      uint32_t dex_location_checksum,
                                                      const char* oat_location,
                                                      std::string* error_msg) {
-  UniquePtr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
                                             !Runtime::Current()->IsCompiler(),
                                             error_msg));
   if (oat_file.get() == nullptr) {
@@ -745,15 +756,16 @@
   }
 
  private:
-  UniquePtr<File> file_;
+  std::unique_ptr<File> file_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedFlock);
 };
 
-const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(const char* dex_location,
-                                                              uint32_t dex_location_checksum,
-                                                              const char* oat_location,
-                                                              std::vector<std::string>* error_msgs) {
+const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(
+    const char* dex_location,
+    uint32_t dex_location_checksum,
+    const char* oat_location,
+    std::vector<std::string>* error_msgs) {
   // We play a locking game here so that if two different processes
   // race to generate (or worse, one tries to open a partial generated
   // file) we will be okay. This is actually common with apps that use
@@ -784,7 +796,7 @@
     error_msgs->push_back(error_msg);
     return nullptr;
   }
-  UniquePtr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
                                             !Runtime::Current()->IsCompiler(),
                                             &error_msg));
   if (oat_file.get() == nullptr) {
@@ -829,7 +841,7 @@
     image_oat_checksum = image_header.GetOatChecksum();
     image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
   } else {
-    UniquePtr<ImageHeader> image_header(gc::space::ImageSpace::ReadImageHeaderOrDie(
+    std::unique_ptr<ImageHeader> image_header(gc::space::ImageSpace::ReadImageHeaderOrDie(
         image_space->GetImageLocation().c_str(), instruction_set));
     image_oat_checksum = image_header->GetOatChecksum();
     image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin());
@@ -838,7 +850,8 @@
   bool image_check = ((oat_header.GetImageFileLocationOatChecksum() == image_oat_checksum)
                       && (oat_header.GetImageFileLocationOatDataBegin() == image_oat_data_begin));
 
-  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location, &dex_location_checksum);
+  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
+                                                                    &dex_location_checksum);
   if (oat_dex_file == NULL) {
     *error_msg = StringPrintf("oat file '%s' does not contain contents for '%s' with checksum 0x%x",
                               oat_file->GetLocation().c_str(), dex_location, dex_location_checksum);
@@ -878,7 +891,7 @@
                                                             const char* dex_location,
                                                             std::string* error_msg,
                                                             bool* open_failed) {
-  UniquePtr<const OatFile> oat_file(FindOatFileFromOatLocation(oat_file_location, error_msg));
+  std::unique_ptr<const OatFile> oat_file(FindOatFileFromOatLocation(oat_file_location, error_msg));
   if (oat_file.get() == nullptr) {
     *open_failed = true;
     return nullptr;
@@ -913,9 +926,11 @@
   return dex_file;
 }
 
-const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(const char* dex_location,
-                                                                const uint32_t* const dex_location_checksum,
-                                                                std::vector<std::string>* error_msgs) {
+const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(
+    const char* dex_location,
+    const uint32_t* const dex_location_checksum,
+    InstructionSet isa,
+    std::vector<std::string>* error_msgs) {
   const OatFile* open_oat_file = FindOpenedOatFileFromDexLocation(dex_location,
                                                                   dex_location_checksum);
   if (open_oat_file != nullptr) {
@@ -930,8 +945,8 @@
   }
 
   // Look for an existing file next to dex. for example, for
-  // /foo/bar/baz.jar, look for /foo/bar/baz.odex.
-  std::string odex_filename(OatFile::DexFilenameToOdexFilename(dex_location));
+  // /foo/bar/baz.jar, look for /foo/bar/<isa>/baz.odex.
+  std::string odex_filename(DexFilenameToOdexFilename(dex_location, isa));
   bool open_failed;
   std::string error_msg;
   const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(odex_filename, dex_location,
@@ -1239,22 +1254,22 @@
   if (location.Get() == NULL) {
     return NULL;
   }
-  Handle<mirror::ObjectArray<mirror::String> >
+  Handle<mirror::ObjectArray<mirror::String>>
       strings(hs.NewHandle(AllocStringArray(self, dex_file.NumStringIds())));
   if (strings.Get() == NULL) {
     return NULL;
   }
-  Handle<mirror::ObjectArray<mirror::Class> >
+  Handle<mirror::ObjectArray<mirror::Class>>
       types(hs.NewHandle(AllocClassArray(self, dex_file.NumTypeIds())));
   if (types.Get() == NULL) {
     return NULL;
   }
-  Handle<mirror::ObjectArray<mirror::ArtMethod> >
+  Handle<mirror::ObjectArray<mirror::ArtMethod>>
       methods(hs.NewHandle(AllocArtMethodArray(self, dex_file.NumMethodIds())));
   if (methods.Get() == NULL) {
     return NULL;
   }
-  Handle<mirror::ObjectArray<mirror::ArtField> >
+  Handle<mirror::ObjectArray<mirror::ArtField>>
       fields(hs.NewHandle(AllocArtFieldArray(self, dex_file.NumFieldIds())));
   if (fields.Get() == NULL) {
     return NULL;
@@ -1294,9 +1309,9 @@
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
   InitializeClassVisitor visitor(class_size);
-  mirror::Object* k =
-      kMovingClasses ? heap->AllocObject<true>(self, java_lang_Class, class_size, visitor)
-                     : heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
+  mirror::Object* k = (kMovingClasses) ?
+      heap->AllocObject<true>(self, java_lang_Class, class_size, visitor) :
+      heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
   if (UNLIKELY(k == nullptr)) {
     CHECK(self->IsExceptionPending());  // OOME.
     return nullptr;
@@ -1698,8 +1713,9 @@
 }
 
 // Returns true if the method must run with interpreter, false otherwise.
-static bool NeedsInterpreter(mirror::ArtMethod* method, const void* quick_code,
-                             const void* portable_code) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+static bool NeedsInterpreter(
+    mirror::ArtMethod* method, const void* quick_code, const void* portable_code)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if ((quick_code == nullptr) && (portable_code == nullptr)) {
     // No code: need interpreter.
     // May return true for native code, in the case of generic JNI
@@ -2396,7 +2412,8 @@
 bool ClassLinker::RemoveClass(const char* descriptor, const mirror::ClassLoader* class_loader) {
   size_t hash = Hash(descriptor);
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  for (auto it = class_table_.lower_bound(hash), end = class_table_.end(); it != end && it->first == hash;
+  for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
+       it != end && it->first == hash;
        ++it) {
     mirror::Class* klass = it->second;
     if (klass->GetClassLoader() == class_loader && descriptor == klass->GetDescriptor()) {
@@ -2590,9 +2607,10 @@
       VerifyClass(super);
     }
     if (!super->IsCompileTimeVerified()) {
-      std::string error_msg(StringPrintf("Rejecting class %s that attempts to sub-class erroneous class %s",
-                                         PrettyDescriptor(klass.Get()).c_str(),
-                                         PrettyDescriptor(super.Get()).c_str()));
+      std::string error_msg(
+          StringPrintf("Rejecting class %s that attempts to sub-class erroneous class %s",
+                       PrettyDescriptor(klass.Get()).c_str(),
+                       PrettyDescriptor(super.Get()).c_str()));
       LOG(ERROR) << error_msg  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
       Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException(nullptr)));
       if (cause.Get() != nullptr) {
@@ -2801,7 +2819,7 @@
 static void CheckProxyMethod(mirror::ArtMethod* method,
                              Handle<mirror::ArtMethod>& prototype);
 
-mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccess& soa, jstring name,
+mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
                                              jobjectArray interfaces, jobject loader,
                                              jobjectArray methods, jobjectArray throws) {
   Thread* self = soa.Self();
@@ -2873,7 +2891,8 @@
   size_t num_virtual_methods =
       soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods)->GetLength();
   {
-    mirror::ObjectArray<mirror::ArtMethod>* virtuals = AllocArtMethodArray(self, num_virtual_methods);
+    mirror::ObjectArray<mirror::ArtMethod>* virtuals = AllocArtMethodArray(self,
+                                                                           num_virtual_methods);
     if (UNLIKELY(virtuals == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return NULL;
@@ -2894,21 +2913,23 @@
   }
 
   klass->SetSuperClass(proxy_class);  // The super class is java.lang.reflect.Proxy
-  klass->SetStatus(mirror::Class::kStatusLoaded, self);  // Class is now effectively in the loaded state
+  klass->SetStatus(mirror::Class::kStatusLoaded, self);  // Now effectively in the loaded state.
   self->AssertNoPendingException();
 
   {
     ObjectLock<mirror::Class> lock(self, klass);  // Must hold lock on object when resolved.
     // Link the fields and virtual methods, creating vtable and iftables
-    Handle<mirror::ObjectArray<mirror::Class> > h_interfaces(
+    Handle<mirror::ObjectArray<mirror::Class>> h_interfaces(
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces)));
     if (!LinkClass(self, klass, h_interfaces)) {
       klass->SetStatus(mirror::Class::kStatusError, self);
       return nullptr;
     }
 
-    interfaces_sfield->SetObject<false>(klass.Get(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-    throws_sfield->SetObject<false>(klass.Get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
+    interfaces_sfield->SetObject<false>(
+        klass.Get(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    throws_sfield->SetObject<false>(
+        klass.Get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
     klass->SetStatus(mirror::Class::kStatusInitialized, self);
   }
 
@@ -2935,8 +2956,10 @@
 
     mirror::SynthesizedProxyClass* synth_proxy_class =
         down_cast<mirror::SynthesizedProxyClass*>(klass.Get());
-    CHECK_EQ(synth_proxy_class->GetInterfaces(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-    CHECK_EQ(synth_proxy_class->GetThrows(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
+    CHECK_EQ(synth_proxy_class->GetInterfaces(),
+             soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    CHECK_EQ(synth_proxy_class->GetThrows(),
+             soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
   }
   std::string descriptor(GetDescriptorForProxy(klass.Get()));
   mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(), Hash(descriptor.c_str()));
@@ -3188,7 +3211,8 @@
         // The super class was verified ahead of entering initializing, we should only be here if
         // the super class became erroneous due to initialization.
         CHECK(handle_scope_super->IsErroneous() && self->IsExceptionPending())
-            << "Super class initialization failed for " << PrettyDescriptor(handle_scope_super.Get())
+            << "Super class initialization failed for "
+            << PrettyDescriptor(handle_scope_super.Get())
             << " that has unexpected status " << handle_scope_super->GetStatus()
             << "\nPending exception:\n"
             << (self->GetException(NULL) != NULL ? self->GetException(NULL)->Dump() : "");
@@ -3211,7 +3235,8 @@
                                        this, *dex_class_def);
     if (it.HasNext()) {
       CHECK(can_init_statics);
-      // We reordered the fields, so we need to be able to map the field indexes to the right fields.
+      // We reordered the fields, so we need to be able to map the
+      // field indexes to the right fields.
       SafeMap<uint32_t, mirror::ArtField*> field_map;
       ConstructFieldMap(dex_file, *dex_class_def, klass.Get(), field_map);
       for (size_t i = 0; it.HasNext(); i++, it.Next()) {
@@ -3374,7 +3399,7 @@
 }
 
 bool ClassLinker::LinkClass(Thread* self, const Handle<mirror::Class>& klass,
-                            const Handle<mirror::ObjectArray<mirror::Class> >& interfaces) {
+                            const Handle<mirror::ObjectArray<mirror::Class>>& interfaces) {
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
   if (!LinkSuperClass(klass)) {
     return false;
@@ -3469,7 +3494,8 @@
     return false;
   }
 
-  // Inherit kAccClassIsFinalizable from the superclass in case this class doesn't override finalize.
+  // Inherit kAccClassIsFinalizable from the superclass in case this
+  // class doesn't override finalize.
   if (super->IsFinalizable()) {
     klass->SetFinalizable();
   }
@@ -3499,7 +3525,7 @@
 
 // Populate the class vtable and itable. Compute return type indices.
 bool ClassLinker::LinkMethods(const Handle<mirror::Class>& klass,
-                              const Handle<mirror::ObjectArray<mirror::Class> >& interfaces) {
+                              const Handle<mirror::ObjectArray<mirror::Class>>& interfaces) {
   if (klass->IsInterface()) {
     // No vtable.
     size_t count = klass->NumVirtualMethods();
@@ -3522,7 +3548,8 @@
 bool ClassLinker::LinkVirtualMethods(const Handle<mirror::Class>& klass) {
   Thread* self = Thread::Current();
   if (klass->HasSuperClass()) {
-    uint32_t max_count = klass->NumVirtualMethods() + klass->GetSuperClass()->GetVTable()->GetLength();
+    uint32_t max_count = (klass->NumVirtualMethods() +
+                          klass->GetSuperClass()->GetVTable()->GetLength());
     size_t actual_count = klass->GetSuperClass()->GetVTable()->GetLength();
     CHECK_LE(actual_count, max_count);
     // TODO: do not assign to the vtable field until it is fully constructed.
@@ -3542,7 +3569,8 @@
         mirror::ArtMethod* super_method = vtable->Get(j);
         MethodHelper super_mh(super_method);
         if (local_mh.HasSameNameAndSignature(&super_mh)) {
-          if (klass->CanAccessMember(super_method->GetDeclaringClass(), super_method->GetAccessFlags())) {
+          if (klass->CanAccessMember(super_method->GetDeclaringClass(),
+                                     super_method->GetAccessFlags())) {
             if (super_method->IsFinal()) {
               ThrowLinkageError(klass.Get(), "Method %s overrides final method in class %s",
                                 PrettyMethod(local_method).c_str(),
@@ -3588,7 +3616,7 @@
       return false;
     }
     StackHandleScope<1> hs(self);
-    Handle<mirror::ObjectArray<mirror::ArtMethod> >
+    Handle<mirror::ObjectArray<mirror::ArtMethod>>
         vtable(hs.NewHandle(AllocArtMethodArray(self, num_virtual_methods)));
     if (UNLIKELY(vtable.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
@@ -3604,8 +3632,9 @@
   return true;
 }
 
-bool ClassLinker::LinkInterfaceMethods(const Handle<mirror::Class>& klass,
-                                       const Handle<mirror::ObjectArray<mirror::Class> >& interfaces) {
+bool ClassLinker::LinkInterfaceMethods(
+    const Handle<mirror::Class>& klass,
+    const Handle<mirror::ObjectArray<mirror::Class>>& interfaces) {
   Thread* const self = Thread::Current();
   // Set the imt table to be all conflicts by default.
   klass->SetImTable(Runtime::Current()->GetDefaultImt());
@@ -3720,7 +3749,7 @@
   }
   // Allocate imtable
   bool imtable_changed = false;
-  Handle<mirror::ObjectArray<mirror::ArtMethod> > imtable(
+  Handle<mirror::ObjectArray<mirror::ArtMethod>> imtable(
       hs.NewHandle(AllocArtMethodArray(self, kImtSize)));
   if (UNLIKELY(imtable.Get() == NULL)) {
     CHECK(self->IsExceptionPending());  // OOME.
@@ -3731,14 +3760,14 @@
     size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
     if (num_methods > 0) {
       StackHandleScope<2> hs(self);
-      Handle<mirror::ObjectArray<mirror::ArtMethod> >
+      Handle<mirror::ObjectArray<mirror::ArtMethod>>
           method_array(hs.NewHandle(AllocArtMethodArray(self, num_methods)));
       if (UNLIKELY(method_array.Get() == nullptr)) {
         CHECK(self->IsExceptionPending());  // OOME.
         return false;
       }
       iftable->SetMethodArray(i, method_array.Get());
-      Handle<mirror::ObjectArray<mirror::ArtMethod> > vtable(
+      Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable(
           hs.NewHandle(klass->GetVTableDuringLinking()));
       for (size_t j = 0; j < num_methods; ++j) {
         mirror::ArtMethod* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j);
@@ -3757,10 +3786,11 @@
           MethodHelper vtable_mh(vtable_method);
           if (interface_mh.HasSameNameAndSignature(&vtable_mh)) {
             if (!vtable_method->IsAbstract() && !vtable_method->IsPublic()) {
-              ThrowIllegalAccessError(klass.Get(),
-                                      "Method '%s' implementing interface method '%s' is not public",
-                                      PrettyMethod(vtable_method).c_str(),
-                                      PrettyMethod(interface_method).c_str());
+              ThrowIllegalAccessError(
+                  klass.Get(),
+                  "Method '%s' implementing interface method '%s' is not public",
+                  PrettyMethod(vtable_method).c_str(),
+                  PrettyMethod(interface_method).c_str());
               return false;
             }
             method_array->Set<false>(j, vtable_method);
@@ -3827,7 +3857,7 @@
     klass->SetVirtualMethods(virtuals);
 
     StackHandleScope<1> hs(self);
-    Handle<mirror::ObjectArray<mirror::ArtMethod> > vtable(
+    Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable(
         hs.NewHandle(klass->GetVTableDuringLinking()));
     CHECK(vtable.Get() != NULL);
     int old_vtable_count = vtable->GetLength();
@@ -3886,8 +3916,10 @@
     if (type1 != type2) {
       bool is_primitive1 = type1 != Primitive::kPrimNot;
       bool is_primitive2 = type2 != Primitive::kPrimNot;
-      bool is64bit1 = is_primitive1 && (type1 == Primitive::kPrimLong || type1 == Primitive::kPrimDouble);
-      bool is64bit2 = is_primitive2 && (type2 == Primitive::kPrimLong || type2 == Primitive::kPrimDouble);
+      bool is64bit1 = is_primitive1 && (type1 == Primitive::kPrimLong ||
+                                        type1 == Primitive::kPrimDouble);
+      bool is64bit2 = is_primitive2 && (type2 == Primitive::kPrimLong ||
+                                        type2 == Primitive::kPrimDouble);
       int order1 = !is_primitive1 ? 0 : (is64bit1 ? 1 : 2);
       int order2 = !is_primitive2 ? 0 : (is64bit2 ? 1 : 2);
       if (order1 != order2) {
@@ -3917,20 +3949,21 @@
   } else {
     mirror::Class* super_class = klass->GetSuperClass();
     if (super_class != NULL) {
-      CHECK(super_class->IsResolved());
+      CHECK(super_class->IsResolved())
+          << PrettyClass(klass.Get()) << " " << PrettyClass(super_class);
       field_offset = MemberOffset(super_class->GetObjectSize());
     }
     size = field_offset.Uint32Value();
   }
 
-  CHECK_EQ(num_fields == 0, fields == NULL);
+  CHECK_EQ(num_fields == 0, fields == NULL) << PrettyClass(klass.Get());
 
   // we want a relatively stable order so that adding new fields
   // minimizes disruption of C++ version such as Class and Method.
   std::deque<mirror::ArtField*> grouped_and_sorted_fields;
   for (size_t i = 0; i < num_fields; i++) {
     mirror::ArtField* f = fields->Get(i);
-    CHECK(f != NULL);
+    CHECK(f != NULL) << PrettyClass(klass.Get());
     grouped_and_sorted_fields.push_back(f);
   }
   std::sort(grouped_and_sorted_fields.begin(), grouped_and_sorted_fields.end(),
@@ -3962,7 +3995,7 @@
       mirror::ArtField* field = grouped_and_sorted_fields[i];
       FieldHelper fh(field);
       Primitive::Type type = fh.GetTypeAsPrimitiveType();
-      CHECK(type != Primitive::kPrimNot);  // should only be working on primitive types
+      CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
       if (type == Primitive::kPrimLong || type == Primitive::kPrimDouble) {
         continue;
       }
@@ -3978,13 +4011,14 @@
 
   // Alignment is good, shuffle any double-wide fields forward, and
   // finish assigning field offsets to all fields.
-  DCHECK(current_field == num_fields || IsAligned<8>(field_offset.Uint32Value()));
+  DCHECK(current_field == num_fields || IsAligned<8>(field_offset.Uint32Value()))
+      << PrettyClass(klass.Get());
   while (!grouped_and_sorted_fields.empty()) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
     grouped_and_sorted_fields.pop_front();
     FieldHelper fh(field);
     Primitive::Type type = fh.GetTypeAsPrimitiveType();
-    CHECK(type != Primitive::kPrimNot);  // should only be working on primitive types
+    CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
     fields->Set<false>(current_field, field);
     field->SetOffset(field_offset);
     field_offset = MemberOffset(field_offset.Uint32Value() +
@@ -3998,9 +4032,9 @@
   if (!is_static && "Ljava/lang/ref/Reference;" == klass->GetDescriptor()) {
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
-    CHECK_EQ(num_reference_fields, num_fields);
+    CHECK_EQ(num_reference_fields, num_fields) << PrettyClass(klass.Get());
     FieldHelper fh(fields->Get(num_fields - 1));
-    CHECK_STREQ(fh.GetName(), "referent");
+    CHECK_STREQ(fh.GetName(), "referent") << PrettyClass(klass.Get());
     --num_reference_fields;
   }
 
@@ -4027,14 +4061,14 @@
       if (is_primitive) {
         if (!seen_non_ref) {
           seen_non_ref = true;
-          DCHECK_EQ(num_reference_fields, i);
+          DCHECK_EQ(num_reference_fields, i) << PrettyField(field);
         }
       } else {
-        DCHECK(!seen_non_ref);
+        DCHECK(!seen_non_ref) << PrettyField(field);
       }
     }
     if (!seen_non_ref) {
-      DCHECK_EQ(num_fields, num_reference_fields);
+      DCHECK_EQ(num_fields, num_reference_fields) << PrettyClass(klass.Get());
     }
   }
   size = field_offset.Uint32Value();
@@ -4049,7 +4083,7 @@
       size_t previous_size = klass->GetObjectSize();
       if (previous_size != 0) {
         // Make sure that we didn't originally have an incorrect size.
-        CHECK_EQ(previous_size, size);
+        CHECK_EQ(previous_size, size) << klass->GetDescriptor();
       }
       klass->SetObjectSize(size);
     }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index db780d9..54805be 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -46,7 +46,7 @@
 
 class InternTable;
 template<class T> class ObjectLock;
-class ScopedObjectAccess;
+class ScopedObjectAccessAlreadyRunnable;
 template<class T> class Handle;
 
 typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
@@ -280,6 +280,7 @@
   // does not match the OatFile.
   const DexFile* FindDexFileInOatFileFromDexLocation(const char* location,
                                                      const uint32_t* const location_checksum,
+                                                     InstructionSet isa,
                                                      std::vector<std::string>* error_msgs)
       LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
 
@@ -288,7 +289,7 @@
   static bool VerifyOatFileChecksums(const OatFile* oat_file,
                                      const char* dex_location,
                                      uint32_t dex_location_checksum,
-                                     const InstructionSet instruction_set,
+                                     InstructionSet instruction_set,
                                      std::string* error_msg);
 
   // TODO: replace this with multiple methods that allocate the correct managed type.
@@ -325,8 +326,9 @@
   void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, mirror::ArtMethod* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Class* CreateProxyClass(ScopedObjectAccess& soa, jstring name, jobjectArray interfaces,
-                                  jobject loader, jobjectArray methods, jobjectArray throws)
+  mirror::Class* CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
+                                  jobjectArray interfaces, jobject loader, jobjectArray methods,
+                                  jobjectArray throws)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -485,7 +487,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkClass(Thread* self, const Handle<mirror::Class>& klass,
-                 const Handle<mirror::ObjectArray<mirror::Class> >& interfaces)
+                 const Handle<mirror::ObjectArray<mirror::Class>>& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkSuperClass(const Handle<mirror::Class>& klass)
@@ -495,14 +497,14 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkMethods(const Handle<mirror::Class>& klass,
-                   const Handle<mirror::ObjectArray<mirror::Class> >& interfaces)
+                   const Handle<mirror::ObjectArray<mirror::Class>>& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkVirtualMethods(const Handle<mirror::Class>& klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkInterfaceMethods(const Handle<mirror::Class>& klass,
-                            const Handle<mirror::ObjectArray<mirror::Class> >& interfaces)
+                            const Handle<mirror::ObjectArray<mirror::Class>>& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkStaticFields(const Handle<mirror::Class>& klass)
@@ -569,7 +571,7 @@
   // Class::descriptor_ and Class::class_loader_.
   typedef std::multimap<size_t, mirror::Class*> Table;
   Table class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
-  std::vector<std::pair<size_t, mirror::Class*> > new_class_roots_;
+  std::vector<std::pair<size_t, mirror::Class*>> new_class_roots_;
 
   // Do we need to search dex caches to find image classes?
   bool dex_cache_image_class_lookup_required_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index ff90f41..d04f02b 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -16,9 +16,9 @@
 
 #include "class_linker.h"
 
+#include <memory>
 #include <string>
 
-#include "UniquePtrCompat.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index d7a1667..bac212a 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -24,6 +24,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <fstream>
+#include <memory>
 
 #include "../../external/icu4c/common/unicode/uvernum.h"
 #include "base/macros.h"
@@ -47,7 +48,6 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "utils.h"
-#include "UniquePtrCompat.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 #include "well_known_classes.h"
@@ -75,9 +75,14 @@
     file_.reset(new File(fd, GetFilename()));
   }
 
+  explicit ScratchFile(File* file) {
+    CHECK(file != NULL);
+    filename_ = file->GetPath();
+    file_.reset(file);
+  }
+
   ~ScratchFile() {
-    int unlink_result = unlink(filename_.c_str());
-    CHECK_EQ(0, unlink_result);
+    Unlink();
   }
 
   const std::string& GetFilename() const {
@@ -92,9 +97,17 @@
     return file_->Fd();
   }
 
+  void Unlink() {
+    if (!OS::FileExists(filename_.c_str())) {
+      return;
+    }
+    int unlink_result = unlink(filename_.c_str());
+    CHECK_EQ(0, unlink_result);
+  }
+
  private:
   std::string filename_;
-  UniquePtr<File> file_;
+  std::unique_ptr<File> file_;
 };
 
 class CommonRuntimeTest : public testing::Test {
@@ -258,11 +271,7 @@
       filename += getenv("ANDROID_HOST_OUT");
       filename += "/framework/";
     } else {
-#ifdef __LP64__
-      filename += "/data/nativetest/art64/";
-#else
       filename += "/data/nativetest/art/";
-#endif
     }
     filename += "art-test-dex-";
     filename += name;
@@ -295,7 +304,7 @@
   std::string dalvik_cache_;
   const DexFile* java_lang_dex_file_;  // owned by runtime_
   std::vector<const DexFile*> boot_class_path_;
-  UniquePtr<Runtime> runtime_;
+  std::unique_ptr<Runtime> runtime_;
   // Owned by the runtime
   ClassLinker* class_linker_;
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 2cbff79..7136c67 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -884,7 +884,7 @@
     std::vector<mirror::Object*> monitors;
     std::vector<uint32_t> stack_depths;
   };
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   OwnedMonitorVisitor visitor(thread, context.get());
   visitor.WalkStack();
 
@@ -2247,7 +2247,7 @@
       return JDWP::ERR_THREAD_NOT_SUSPENDED;
     }
   }
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   GetThisVisitor visitor(thread, context.get(), frame_id);
   visitor.WalkStack();
   *result = gRegistry->Add(visitor.this_object);
@@ -2395,7 +2395,7 @@
     return error;
   }
   // TODO check thread is suspended by the debugger ?
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   GetLocalVisitor visitor(soa, thread, context.get(), frame_id, slot, tag, buf, width);
   visitor.WalkStack();
   return visitor.error_;
@@ -2492,7 +2492,7 @@
     return error;
   }
   // TODO check thread is suspended by the debugger ?
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   SetLocalVisitor visitor(thread, context.get(), frame_id, slot, tag, value, width);
   visitor.WalkStack();
   return visitor.error_;
@@ -4275,7 +4275,7 @@
     for (const std::string& str : table_) {
       const char* s = str.c_str();
       size_t s_len = CountModifiedUtf8Chars(s);
-      UniquePtr<uint16_t> s_utf16(new uint16_t[s_len]);
+      std::unique_ptr<uint16_t> s_utf16(new uint16_t[s_len]);
       ConvertModifiedUtf8ToUtf16(s_utf16.get(), s);
       JDWP::AppendUtf16BE(bytes, s_utf16.get(), s_len);
     }
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 26b7d07..43ae308 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -23,6 +23,7 @@
 #include <string.h>
 #include <sys/file.h>
 #include <sys/stat.h>
+#include <memory>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
@@ -39,7 +40,6 @@
 #include "ScopedFd.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtrCompat.h"
 #include "utf-inl.h"
 #include "utils.h"
 #include "well_known_classes.h"
@@ -93,12 +93,12 @@
     return false;
   }
   if (IsZipMagic(magic)) {
-    UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
+    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
     if (zip_archive.get() == NULL) {
       *error_msg = StringPrintf("Failed to open zip archive '%s'", filename);
       return false;
     }
-    UniquePtr<ZipEntry> zip_entry(zip_archive->Find(kClassesDex, error_msg));
+    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(kClassesDex, error_msg));
     if (zip_entry.get() == NULL) {
       *error_msg = StringPrintf("Zip archive '%s' doesn't contain %s (error msg: %s)", filename,
                                 kClassesDex, error_msg->c_str());
@@ -108,7 +108,7 @@
     return true;
   }
   if (IsDexMagic(magic)) {
-    UniquePtr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), filename, false, error_msg));
+    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), filename, false, error_msg));
     if (dex_file.get() == NULL) {
       return false;
     }
@@ -171,7 +171,7 @@
 const DexFile* DexFile::OpenFile(int fd, const char* location, bool verify,
                                  std::string* error_msg) {
   CHECK(location != nullptr);
-  UniquePtr<MemMap> map;
+  std::unique_ptr<MemMap> map;
   {
     ScopedFd delayed_close(fd);
     struct stat sbuf;
@@ -218,7 +218,7 @@
 const char* DexFile::kClassesDex = "classes.dex";
 
 const DexFile* DexFile::OpenZip(int fd, const std::string& location, std::string* error_msg) {
-  UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, location.c_str(), error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, location.c_str(), error_msg));
   if (zip_archive.get() == nullptr) {
     DCHECK(!error_msg->empty());
     return nullptr;
@@ -241,17 +241,17 @@
 const DexFile* DexFile::Open(const ZipArchive& zip_archive, const std::string& location,
                              std::string* error_msg) {
   CHECK(!location.empty());
-  UniquePtr<ZipEntry> zip_entry(zip_archive.Find(kClassesDex, error_msg));
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive.Find(kClassesDex, error_msg));
   if (zip_entry.get() == NULL) {
     return nullptr;
   }
-  UniquePtr<MemMap> map(zip_entry->ExtractToMemMap(kClassesDex, error_msg));
+  std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(kClassesDex, error_msg));
   if (map.get() == NULL) {
     *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", kClassesDex, location.c_str(),
                               error_msg->c_str());
     return nullptr;
   }
-  UniquePtr<const DexFile> dex_file(OpenMemory(location, zip_entry->GetCrc32(), map.release(),
+  std::unique_ptr<const DexFile> dex_file(OpenMemory(location, zip_entry->GetCrc32(), map.release(),
                                                error_msg));
   if (dex_file.get() == nullptr) {
     *error_msg = StringPrintf("Failed to open dex file '%s' from memory: %s", location.c_str(),
@@ -276,7 +276,7 @@
                                    uint32_t location_checksum,
                                    MemMap* mem_map, std::string* error_msg) {
   CHECK_ALIGNED(base, 4);  // various dex file structures must be word aligned
-  UniquePtr<DexFile> dex_file(new DexFile(base, size, location, location_checksum, mem_map));
+  std::unique_ptr<DexFile> dex_file(new DexFile(base, size, location, location_checksum, mem_map));
   if (!dex_file->Init(error_msg)) {
     return nullptr;
   } else {
@@ -838,7 +838,7 @@
                               void* context) const {
   DCHECK(code_item != nullptr);
   const byte* stream = GetDebugInfoStream(code_item);
-  UniquePtr<LocalInfo[]> local_in_reg(local_cb != NULL ?
+  std::unique_ptr<LocalInfo[]> local_in_reg(local_cb != NULL ?
                                       new LocalInfo[code_item->registers_size_] :
                                       NULL);
   if (stream != NULL) {
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 0146f31..1d5032d 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_DEX_FILE_H_
 #define ART_RUNTIME_DEX_FILE_H_
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -28,7 +29,6 @@
 #include "mem_map.h"
 #include "modifiers.h"
 #include "safe_map.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -870,7 +870,7 @@
   const uint32_t location_checksum_;
 
   // Manages the underlying memory allocation.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
   // Points to the header section.
   const Header* const header_;
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 86c282e..a814c34 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -16,7 +16,8 @@
 
 #include "dex_file.h"
 
-#include "UniquePtrCompat.h"
+#include <memory>
+
 #include "common_runtime_test.h"
 
 namespace art {
@@ -90,7 +91,7 @@
     *dst_size = 0;
     return nullptr;
   }
-  UniquePtr<byte[]> dst(new byte[tmp.size()]);
+  std::unique_ptr<byte[]> dst(new byte[tmp.size()]);
   if (dst_size != nullptr) {
     *dst_size = tmp.size();
   } else {
@@ -131,11 +132,11 @@
   // decode base64
   CHECK(base64 != NULL);
   size_t length;
-  UniquePtr<byte[]> dex_bytes(DecodeBase64(base64, &length));
+  std::unique_ptr<byte[]> dex_bytes(DecodeBase64(base64, &length));
   CHECK(dex_bytes.get() != NULL);
 
   // write to provided file
-  UniquePtr<File> file(OS::CreateEmptyFile(location));
+  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
   CHECK(file.get() != NULL);
   if (!file->WriteFully(dex_bytes.get(), length)) {
     PLOG(FATAL) << "Failed to write base64 as dex file";
@@ -154,7 +155,7 @@
 
 TEST_F(DexFileTest, Header) {
   ScratchFile tmp;
-  UniquePtr<const DexFile> raw(OpenDexFileBase64(kRawDex, tmp.GetFilename().c_str()));
+  std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kRawDex, tmp.GetFilename().c_str()));
   ASSERT_TRUE(raw.get() != NULL);
 
   const DexFile::Header& header = raw->GetHeader();
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index d179c8b..a1c8c71 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -17,12 +17,12 @@
 #include "dex_file_verifier.h"
 
 #include <zlib.h>
+#include <memory>
 
 #include "base/stringprintf.h"
 #include "dex_file-inl.h"
 #include "leb128.h"
 #include "safe_map.h"
-#include "UniquePtrCompat.h"
 #include "utf-inl.h"
 #include "utils.h"
 
@@ -68,7 +68,7 @@
 
 bool DexFileVerifier::Verify(const DexFile* dex_file, const byte* begin, size_t size,
                              const char* location, std::string* error_msg) {
-  UniquePtr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location));
+  std::unique_ptr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location));
   if (!verifier->Verify()) {
     *error_msg = verifier->FailureReason();
     return false;
@@ -652,7 +652,7 @@
     return false;
   }
 
-  UniquePtr<uint32_t[]> handler_offsets(new uint32_t[handlers_size]);
+  std::unique_ptr<uint32_t[]> handler_offsets(new uint32_t[handlers_size]);
   if (!CheckAndGetHandlerOffsets(code_item, &handler_offsets[0], handlers_size)) {
     return false;
   }
diff --git a/runtime/dex_instruction_visitor_test.cc b/runtime/dex_instruction_visitor_test.cc
index 99ad3ed..c5e63eb 100644
--- a/runtime/dex_instruction_visitor_test.cc
+++ b/runtime/dex_instruction_visitor_test.cc
@@ -17,8 +17,8 @@
 #include "dex_instruction_visitor.h"
 
 #include <iostream>
+#include <memory>
 
-#include "UniquePtrCompat.h"
 #include "gtest/gtest.h"
 
 namespace art {
@@ -26,7 +26,7 @@
 class TestVisitor : public DexInstructionVisitor<TestVisitor> {};
 
 TEST(InstructionTest, Init) {
-  UniquePtr<TestVisitor> visitor(new TestVisitor);
+  std::unique_ptr<TestVisitor> visitor(new TestVisitor);
 }
 
 class CountVisitor : public DexInstructionVisitor<CountVisitor> {
diff --git a/runtime/dex_method_iterator.h b/runtime/dex_method_iterator.h
index 1975e48..806266d 100644
--- a/runtime/dex_method_iterator.h
+++ b/runtime/dex_method_iterator.h
@@ -140,7 +140,7 @@
   uint32_t class_def_index_;
   const DexFile::ClassDef* class_def_;
   const byte* class_data_;
-  UniquePtr<ClassDataItemIterator> it_;
+  std::unique_ptr<ClassDataItemIterator> it_;
   bool direct_method_;
 };
 
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 01ca60f..5d20096 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -127,7 +127,7 @@
 
 ElfFile* ElfFile::Open(File* file, bool writable, bool program_header_only,
                        std::string* error_msg) {
-  UniquePtr<ElfFile> elf_file(new ElfFile(file, writable, program_header_only));
+  std::unique_ptr<ElfFile> elf_file(new ElfFile(file, writable, program_header_only));
   if (!elf_file->Setup(error_msg)) {
     return nullptr;
   }
@@ -844,7 +844,7 @@
     if (program_header.p_vaddr == 0) {
       std::string reservation_name("ElfFile reservation for ");
       reservation_name += file_->GetPath();
-      UniquePtr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
+      std::unique_ptr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
                                                      NULL, GetLoadedSize(), PROT_NONE, false,
                                                      error_msg));
       if (reserve.get() == nullptr) {
@@ -884,7 +884,7 @@
                                 file_->GetPath().c_str());
       return false;
     }
-    UniquePtr<MemMap> segment(MemMap::MapFileAtAddress(p_vaddr,
+    std::unique_ptr<MemMap> segment(MemMap::MapFileAtAddress(p_vaddr,
                                                        program_header.p_memsz,
                                                        prot, flags, file_->Fd(),
                                                        program_header.p_offset,
@@ -999,7 +999,7 @@
 
   // Well, we need the whole file to do this.
   std::string error_msg;
-  UniquePtr<ElfFile> ptr(Open(const_cast<File*>(file_), false, false, &error_msg));
+  std::unique_ptr<ElfFile> ptr(Open(const_cast<File*>(file_), false, false, &error_msg));
   ElfFile& all = *ptr;
 
   // Do we have interesting sections?
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index 138147b..6650acd 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_ELF_FILE_H_
 
 #include <map>
+#include <memory>
 #include <vector>
 
 #include "base/unix_file/fd_file.h"
@@ -25,7 +26,6 @@
 #include "elf_utils.h"
 #include "mem_map.h"
 #include "os.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -154,7 +154,7 @@
 
   // ELF header mapping. If program_header_only_ is false, will
   // actually point to the entire elf file.
-  UniquePtr<MemMap> map_;
+  std::unique_ptr<MemMap> map_;
   Elf32_Ehdr* header_;
   std::vector<MemMap*> segments_;
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index c81706f..39b2ec2 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -139,7 +139,7 @@
   self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
 }
 
-JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
+JValue InvokeProxyInvocationHandler(ScopedObjectAccessAlreadyRunnable& soa, const char* shorty,
                                     jobject rcvr_jobj, jobject interface_method_jobj,
                                     std::vector<jvalue>& args) {
   DCHECK(soa.Env()->IsInstanceOf(rcvr_jobj, WellKnownClasses::java_lang_reflect_Proxy));
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index bfcb58f..f1795a5 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -699,7 +699,7 @@
   }
 }
 
-JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
+JValue InvokeProxyInvocationHandler(ScopedObjectAccessAlreadyRunnable& soa, const char* shorty,
                                     jobject rcvr_jobj, jobject interface_art_method_jobj,
                                     std::vector<jvalue>& args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/entrypoints/portable/portable_jni_entrypoints.cc b/runtime/entrypoints/portable/portable_jni_entrypoints.cc
index 17ad4d0..3e7b30a 100644
--- a/runtime/entrypoints/portable/portable_jni_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_jni_entrypoints.cc
@@ -37,7 +37,8 @@
   return art_portable_jni_method_start(self);
 }
 
-static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) {
+static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JNIEnvExt* env = self->GetJniEnv();
   env->locals.SetSegmentState(env->local_ref_cookie);
   env->local_ref_cookie = saved_local_ref_cookie;
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 2d5c07d..c38a595 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -242,9 +242,11 @@
 extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
                                         Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtMethod* callee_save = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  mirror::ArtMethod* referrer =
-      sp[callee_save->GetFrameSizeInBytes() / sizeof(mirror::ArtMethod*)];
+  Runtime* runtime = Runtime::Current();
+  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
+  uint32_t frame_size =
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes();
+  mirror::ArtMethod* referrer = sp[frame_size / sizeof(mirror::ArtMethod*)];
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int64_t));
   if (LIKELY(field != NULL  && obj != NULL)) {
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 60c5377..11a4b3b 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -48,10 +48,13 @@
   //       stack.
   // Be aware the store below may well stomp on an incoming argument.
   Locks::mutator_lock_->AssertSharedHeld(self);
-  mirror::ArtMethod* callee_save = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly);
+  Runtime* runtime = Runtime::Current();
+  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
   *sp = callee_save;
+  uint32_t return_pc_offset = callee_save->GetReturnPcOffsetInBytes(
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes());
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) +
-                                                      callee_save->GetReturnPcOffsetInBytes());
+                                                      return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
   self->SetTopOfStack(sp, 0);
   self->VerifyStack();
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 9c9cca8..5d36b4c 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -61,7 +61,8 @@
   }
 }
 
-static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) {
+static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JNIEnvExt* env = self->GetJniEnv();
   env->locals.SetSegmentState(env->local_ref_cookie);
   env->local_ref_cookie = saved_local_ref_cookie;
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 887bd6f..ee276c1 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -523,7 +523,7 @@
   ScopedObjectAccessUnchecked* const soa_;
   std::vector<jvalue>* const args_;
   // References which we must update when exiting in case the GC moved the objects.
-  std::vector<std::pair<jobject, StackReference<mirror::Object>*> > references_;
+  std::vector<std::pair<jobject, StackReference<mirror::Object>*>> references_;
 
   DISALLOW_COPY_AND_ASSIGN(BuildQuickArgumentVisitor);
 };
@@ -640,7 +640,7 @@
  private:
   ScopedObjectAccessUnchecked* const soa_;
   // References which we must update when exiting in case the GC moved the objects.
-  std::vector<std::pair<jobject, StackReference<mirror::Object>*> > references_;
+  std::vector<std::pair<jobject, StackReference<mirror::Object>*>> references_;
   DISALLOW_COPY_AND_ASSIGN(RememberForGcArgumentVisitor);
 };
 
@@ -1602,15 +1602,72 @@
   }
 }
 
-template<InvokeType type, bool access_check>
-static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
-                                mirror::ArtMethod* caller_method,
-                                Thread* self, mirror::ArtMethod** sp);
+// The following definitions create return types for two word-sized entities that will be passed
+// in registers so that memory operations for the interface trampolines can be avoided. The entities
+// are the resolved method and the pointer to the code to be invoked.
+//
+// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be
+// uint64_t or long long int. We use the upper 32b for code, and the lower 32b for the method.
+//
+// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two
+// size_t-sized values.
+//
+// We need two operations:
+//
+// 1) A flag value that signals failure. The assembly stubs expect the method part to be "0".
+//    GetFailureValue() will return a value that has method == 0.
+//
+// 2) A value that combines a code pointer and a method pointer.
+//    GetSuccessValue() constructs this.
+
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+typedef uint64_t MethodAndCode;
+
+// Encodes method_ptr==nullptr and code_ptr==nullptr
+static constexpr MethodAndCode GetFailureValue() {
+  return 0;
+}
+
+// Use the lower 32b for the method pointer and the upper 32b for the code pointer.
+static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) {
+  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
+  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
+  return ((code_uint << 32) | method_uint);
+}
+
+#elif defined(__x86_64__) || defined(__aarch64__)
+struct MethodAndCode {
+  uintptr_t method;
+  uintptr_t code;
+};
+
+// Encodes method_ptr==nullptr. Leaves random value in code pointer.
+static MethodAndCode GetFailureValue() {
+  MethodAndCode ret;
+  ret.method = 0;
+  return ret;
+}
+
+// Write values into their respective members.
+static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) {
+  MethodAndCode ret;
+  ret.method = reinterpret_cast<uintptr_t>(method);
+  ret.code = reinterpret_cast<uintptr_t>(code);
+  return ret;
+}
+#else
+#error "Unsupported architecture"
+#endif
 
 template<InvokeType type, bool access_check>
-static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
-                                mirror::ArtMethod* caller_method,
-                                Thread* self, mirror::ArtMethod** sp) {
+static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
+                                     mirror::ArtMethod* caller_method,
+                                     Thread* self, mirror::ArtMethod** sp);
+
+template<InvokeType type, bool access_check>
+static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
+                                     mirror::ArtMethod* caller_method,
+                                     Thread* self, mirror::ArtMethod** sp) {
   mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check,
                                              type);
   if (UNLIKELY(method == nullptr)) {
@@ -1630,7 +1687,7 @@
 
     if (UNLIKELY(method == NULL)) {
       CHECK(self->IsExceptionPending());
-      return 0;  // failure
+      return GetFailureValue();  // Failure.
     }
   }
   DCHECK(!self->IsExceptionPending());
@@ -1639,24 +1696,17 @@
   // When we return, the caller will branch to this address, so it had better not be 0!
   DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
       << MethodHelper(method).GetDexFile().GetLocation();
-#ifdef __LP64__
-  UNIMPLEMENTED(FATAL);
-  return 0;
-#else
-  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
-  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
-  uint64_t result = ((code_uint << 32) | method_uint);
-  return result;
-#endif
+
+  return GetSuccessValue(code, method);
 }
 
 // Explicit artInvokeCommon template function declarations to please analysis tool.
 #define EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(type, access_check)                                \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
-  uint64_t artInvokeCommon<type, access_check>(uint32_t method_idx,                             \
-                                               mirror::Object* this_object,                     \
-                                               mirror::ArtMethod* caller_method,                \
-                                               Thread* self, mirror::ArtMethod** sp)            \
+  MethodAndCode artInvokeCommon<type, access_check>(uint32_t method_idx,                        \
+                                                    mirror::Object* this_object,                \
+                                                    mirror::ArtMethod* caller_method,           \
+                                                    Thread* self, mirror::ArtMethod** sp)       \
 
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, false);
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, true);
@@ -1672,57 +1722,57 @@
 
 
 // See comments in runtime_support_asm.S
-extern "C" uint64_t artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
-                                                                mirror::Object* this_object,
-                                                                mirror::ArtMethod* caller_method,
-                                                                Thread* self,
-                                                                mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                     mirror::Object* this_object,
+                                                                     mirror::ArtMethod* caller_method,
+                                                                     Thread* self,
+                                                                     mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kInterface, true>(method_idx, this_object, caller_method, self, sp);
 }
 
 
-extern "C" uint64_t artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
-                                                             mirror::Object* this_object,
-                                                             mirror::ArtMethod* caller_method,
-                                                             Thread* self,
-                                                             mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                  mirror::Object* this_object,
+                                                                  mirror::ArtMethod* caller_method,
+                                                                  Thread* self,
+                                                                  mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kDirect, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" uint64_t artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
-                                                             mirror::Object* this_object,
-                                                             mirror::ArtMethod* caller_method,
-                                                             Thread* self,
-                                                             mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                  mirror::Object* this_object,
+                                                                  mirror::ArtMethod* caller_method,
+                                                                  Thread* self,
+                                                                  mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kStatic, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" uint64_t artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
-                                                            mirror::Object* this_object,
-                                                            mirror::ArtMethod* caller_method,
-                                                            Thread* self,
-                                                            mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                 mirror::Object* this_object,
+                                                                 mirror::ArtMethod* caller_method,
+                                                                 Thread* self,
+                                                                 mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kSuper, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" uint64_t artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
-                                                              mirror::Object* this_object,
-                                                              mirror::ArtMethod* caller_method,
-                                                              Thread* self,
-                                                              mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                   mirror::Object* this_object,
+                                                                   mirror::ArtMethod* caller_method,
+                                                                   Thread* self,
+                                                                   mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kVirtual, true>(method_idx, this_object, caller_method, self, sp);
 }
 
 // Determine target of interface dispatch. This object is known non-null.
-extern "C" uint64_t artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
-                                                 mirror::Object* this_object,
-                                                 mirror::ArtMethod* caller_method,
-                                                 Thread* self, mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
+                                                      mirror::Object* this_object,
+                                                      mirror::ArtMethod* caller_method,
+                                                      Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method;
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
@@ -1731,7 +1781,7 @@
       FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
       ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(interface_method, this_object,
                                                                  caller_method);
-      return 0;  // Failure.
+      return GetFailureValue();  // Failure.
     }
   } else {
     FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
@@ -1828,7 +1878,7 @@
 
     if (UNLIKELY(method == nullptr)) {
       CHECK(self->IsExceptionPending());
-      return 0;  // Failure.
+      return GetFailureValue();  // Failure.
     }
   }
   const void* code = method->GetEntryPointFromQuickCompiledCode();
@@ -1836,15 +1886,8 @@
   // When we return, the caller will branch to this address, so it had better not be 0!
   DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
       << MethodHelper(method).GetDexFile().GetLocation();
-#ifdef __LP64__
-  UNIMPLEMENTED(FATAL);
-  return 0;
-#else
-  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
-  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
-  uint64_t result = ((code_uint << 32) | method_uint);
-  return result;
-#endif
+
+  return GetSuccessValue(code, method);
 }
 
 }  // namespace art
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 37ad9e5..751cdb6 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <memory>
+
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
@@ -27,7 +29,6 @@
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtrCompat.h"
 #include "vmap_table.h"
 
 namespace art {
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 4d7fd0a..8d750c5 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -79,7 +79,7 @@
       return;
     }
   }
-  LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
+  VLOG(signals)<< "Caught unknown SIGSEGV in ART fault handler";
   oldaction_.sa_sigaction(sig, info, context);
 }
 
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 7d8b584..979970c 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -18,12 +18,12 @@
 #define ART_RUNTIME_GC_ACCOUNTING_ATOMIC_STACK_H_
 
 #include <algorithm>
+#include <memory>
 #include <string>
 
 #include "atomic.h"
 #include "base/logging.h"
 #include "base/macros.h"
-#include "UniquePtrCompat.h"
 #include "mem_map.h"
 #include "utils.h"
 
@@ -36,7 +36,7 @@
  public:
   // Capacity is how many elements we can store in the stack.
   static AtomicStack* Create(const std::string& name, size_t capacity) {
-    UniquePtr<AtomicStack> mark_stack(new AtomicStack(name, capacity));
+    std::unique_ptr<AtomicStack> mark_stack(new AtomicStack(name, capacity));
     mark_stack->Init();
     return mark_stack.release();
   }
@@ -46,8 +46,8 @@
   void Reset() {
     DCHECK(mem_map_.get() != NULL);
     DCHECK(begin_ != NULL);
-    front_index_ = 0;
-    back_index_ = 0;
+    front_index_.StoreRelaxed(0);
+    back_index_.StoreRelaxed(0);
     debug_is_sorted_ = true;
     int result = madvise(begin_, sizeof(T) * capacity_, MADV_DONTNEED);
     if (result == -1) {
@@ -64,12 +64,12 @@
     }
     int32_t index;
     do {
-      index = back_index_;
+      index = back_index_.LoadRelaxed();
       if (UNLIKELY(static_cast<size_t>(index) >= capacity_)) {
         // Stack overflow.
         return false;
       }
-    } while (!back_index_.CompareAndSwap(index, index + 1));
+    } while (!back_index_.CompareExchangeWeakRelaxed(index, index + 1));
     begin_[index] = value;
     return true;
   }
@@ -83,13 +83,13 @@
     int32_t index;
     int32_t new_index;
     do {
-      index = back_index_;
+      index = back_index_.LoadRelaxed();
       new_index = index + num_slots;
       if (UNLIKELY(static_cast<size_t>(new_index) >= capacity_)) {
         // Stack overflow.
         return false;
       }
-    } while (!back_index_.CompareAndSwap(index, new_index));
+    } while (!back_index_.CompareExchangeWeakRelaxed(index, new_index));
     *start_address = &begin_[index];
     *end_address = &begin_[new_index];
     if (kIsDebugBuild) {
@@ -114,31 +114,31 @@
     if (kIsDebugBuild) {
       debug_is_sorted_ = false;
     }
-    int32_t index = back_index_;
+    int32_t index = back_index_.LoadRelaxed();
     DCHECK_LT(static_cast<size_t>(index), capacity_);
-    back_index_ = index + 1;
+    back_index_.StoreRelaxed(index + 1);
     begin_[index] = value;
   }
 
   T PopBack() {
-    DCHECK_GT(back_index_, front_index_);
+    DCHECK_GT(back_index_.LoadRelaxed(), front_index_.LoadRelaxed());
     // Decrement the back index non atomically.
-    back_index_ = back_index_ - 1;
-    return begin_[back_index_];
+    back_index_.StoreRelaxed(back_index_.LoadRelaxed() - 1);
+    return begin_[back_index_.LoadRelaxed()];
   }
 
   // Take an item from the front of the stack.
   T PopFront() {
-    int32_t index = front_index_;
-    DCHECK_LT(index, back_index_.Load());
-    front_index_ = front_index_ + 1;
+    int32_t index = front_index_.LoadRelaxed();
+    DCHECK_LT(index, back_index_.LoadRelaxed());
+    front_index_.StoreRelaxed(index + 1);
     return begin_[index];
   }
 
   // Pop a number of elements.
   void PopBackCount(int32_t n) {
     DCHECK_GE(Size(), static_cast<size_t>(n));
-    back_index_.FetchAndSub(n);
+    back_index_.FetchAndSubSequentiallyConsistent(n);
   }
 
   bool IsEmpty() const {
@@ -146,16 +146,16 @@
   }
 
   size_t Size() const {
-    DCHECK_LE(front_index_, back_index_);
-    return back_index_ - front_index_;
+    DCHECK_LE(front_index_.LoadRelaxed(), back_index_.LoadRelaxed());
+    return back_index_.LoadRelaxed() - front_index_.LoadRelaxed();
   }
 
   T* Begin() const {
-    return const_cast<T*>(begin_ + front_index_);
+    return const_cast<T*>(begin_ + front_index_.LoadRelaxed());
   }
 
   T* End() const {
-    return const_cast<T*>(begin_ + back_index_);
+    return const_cast<T*>(begin_ + back_index_.LoadRelaxed());
   }
 
   size_t Capacity() const {
@@ -169,11 +169,11 @@
   }
 
   void Sort() {
-    int32_t start_back_index = back_index_.Load();
-    int32_t start_front_index = front_index_.Load();
+    int32_t start_back_index = back_index_.LoadRelaxed();
+    int32_t start_front_index = front_index_.LoadRelaxed();
     std::sort(Begin(), End());
-    CHECK_EQ(start_back_index, back_index_.Load());
-    CHECK_EQ(start_front_index, front_index_.Load());
+    CHECK_EQ(start_back_index, back_index_.LoadRelaxed());
+    CHECK_EQ(start_front_index, front_index_.LoadRelaxed());
     if (kIsDebugBuild) {
       debug_is_sorted_ = true;
     }
@@ -215,7 +215,7 @@
   std::string name_;
 
   // Memory mapping of the atomic stack.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
   // Back index (index after the last element pushed).
   AtomicInteger back_index_;
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index 714e6f7..43a173e 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -55,7 +55,7 @@
   size_t capacity = heap_capacity / kCardSize;
   /* Allocate an extra 256 bytes to allow fixed low-byte of base */
   std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous("card table", NULL,
+  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous("card table", NULL,
                                                  capacity + 256, PROT_READ | PROT_WRITE,
                                                  false, &error_msg));
   CHECK(mem_map.get() != NULL) << "couldn't allocate card table: " << error_msg;
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 17e62a6..7934974 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -17,10 +17,11 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_H_
 #define ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_H_
 
+#include <memory>
+
 #include "base/mutex.h"
 #include "globals.h"
 #include "mem_map.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -141,7 +142,7 @@
   void VerifyCardTable();
 
   // Mmapped pages for the card table
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
   // Value used to compute card table addresses from object addresses, see GetBiasedBegin
   byte* const biased_begin_;
   // Card table doesn't begin at the beginning of the mem_map_, instead it is displaced by offset
diff --git a/runtime/gc/accounting/gc_allocator.h b/runtime/gc/accounting/gc_allocator.h
index 7dd7cca..1d96112 100644
--- a/runtime/gc/accounting/gc_allocator.h
+++ b/runtime/gc/accounting/gc_allocator.h
@@ -73,7 +73,7 @@
 // GCAllocatorImpl<T> if kMeasureGCMemoryOverhead is true, std::allocator<T> otherwise.
 template <typename T>
 class GcAllocator : public TypeStaticIf<kMeasureGcMemoryOverhead, GcAllocatorImpl<T>,
-                                        std::allocator<T> >::type {
+                                        std::allocator<T>>::type {
 };
 
 }  // namespace accounting
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index ef5653a..228d1dc 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -16,6 +16,8 @@
 
 #include "mod_union_table.h"
 
+#include <memory>
+
 #include "base/stl_util.h"
 #include "card_table-inl.h"
 #include "heap_bitmap.h"
@@ -30,7 +32,6 @@
 #include "mirror/object_array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread.h"
-#include "UniquePtrCompat.h"
 
 using ::art::mirror::Object;
 
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index 5ae7c77..449e171 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -50,7 +50,7 @@
 // cleared between GC phases, reducing the number of dirty cards that need to be scanned.
 class ModUnionTable {
  public:
-  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*> > CardSet;
+  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*>> CardSet;
 
   explicit ModUnionTable(const std::string& name, Heap* heap, space::ContinuousSpace* space)
       : name_(name),
@@ -126,7 +126,7 @@
 
   // Maps from dirty cards to their corresponding alloc space references.
   SafeMap<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>, std::less<const byte*>,
-      GcAllocator<std::pair<const byte*, std::vector<mirror::HeapReference<mirror::Object>*> > > >
+      GcAllocator<std::pair<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>>> >
       references_;
 };
 
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 1def334..3ff5874 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -16,6 +16,8 @@
 
 #include "remembered_set.h"
 
+#include <memory>
+
 #include "base/stl_util.h"
 #include "card_table-inl.h"
 #include "heap_bitmap.h"
@@ -30,7 +32,6 @@
 #include "mirror/object_array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/accounting/remembered_set.h b/runtime/gc/accounting/remembered_set.h
index e3d8537..706cf35 100644
--- a/runtime/gc/accounting/remembered_set.h
+++ b/runtime/gc/accounting/remembered_set.h
@@ -43,7 +43,7 @@
 // from the free list spaces to the bump pointer spaces.
 class RememberedSet {
  public:
-  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*> > CardSet;
+  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*>> CardSet;
 
   explicit RememberedSet(const std::string& name, Heap* heap, space::ContinuousSpace* space)
       : name_(name), heap_(heap), space_(space) {}
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index a439462..7f1da79 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -19,6 +19,8 @@
 
 #include "space_bitmap.h"
 
+#include <memory>
+
 #include "base/logging.h"
 #include "dex_file-inl.h"
 #include "heap_bitmap.h"
@@ -28,7 +30,6 @@
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "space_bitmap-inl.h"
-#include "UniquePtrCompat.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 66f9a3a..8e817e5 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -51,7 +51,7 @@
   // Round up since heap_capacity is not necessarily a multiple of kAlignment * kBitsPerWord.
   const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
   std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), nullptr, bitmap_size,
+  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), nullptr, bitmap_size,
                                                  PROT_READ | PROT_WRITE, false, &error_msg));
   if (UNLIKELY(mem_map.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate bitmap " << name << ": " << error_msg;
@@ -226,7 +226,7 @@
 
 template<size_t kAlignment>
 void SpaceBitmap<kAlignment>::InOrderWalk(ObjectCallback* callback, void* arg) {
-  UniquePtr<SpaceBitmap<kAlignment>> visited(
+  std::unique_ptr<SpaceBitmap<kAlignment>> visited(
       Create("bitmap for in-order walk", reinterpret_cast<byte*>(heap_begin_),
              IndexToOffset(bitmap_size_ / kWordSize)));
   CHECK(bitmap_begin_ != nullptr);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 1ccebf5..50d15c6 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -17,17 +17,17 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 
+#include <limits.h>
+#include <stdint.h>
+#include <memory>
+#include <set>
+#include <vector>
+
 #include "base/mutex.h"
 #include "gc_allocator.h"
 #include "globals.h"
 #include "mem_map.h"
 #include "object_callbacks.h"
-#include "UniquePtrCompat.h"
-
-#include <limits.h>
-#include <set>
-#include <stdint.h>
-#include <vector>
 
 namespace art {
 
@@ -217,7 +217,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Backing storage for bitmap.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
   // This bitmap itself, word sized for efficiency in scanning.
   uword* const bitmap_begin_;
diff --git a/runtime/gc/accounting/space_bitmap_test.cc b/runtime/gc/accounting/space_bitmap_test.cc
index 71db44b..a30bb25 100644
--- a/runtime/gc/accounting/space_bitmap_test.cc
+++ b/runtime/gc/accounting/space_bitmap_test.cc
@@ -17,11 +17,11 @@
 #include "space_bitmap.h"
 
 #include <stdint.h>
+#include <memory>
 
 #include "common_runtime_test.h"
 #include "globals.h"
 #include "space_bitmap-inl.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 namespace gc {
@@ -32,7 +32,7 @@
 TEST_F(SpaceBitmapTest, Init) {
   byte* heap_begin = reinterpret_cast<byte*>(0x10000000);
   size_t heap_capacity = 16 * MB;
-  UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+  std::unique_ptr<ContinuousSpaceBitmap> space_bitmap(
       ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   EXPECT_TRUE(space_bitmap.get() != NULL);
 }
@@ -60,7 +60,7 @@
   byte* heap_begin = reinterpret_cast<byte*>(0x10000000);
   size_t heap_capacity = 16 * MB;
 
-  UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+  std::unique_ptr<ContinuousSpaceBitmap> space_bitmap(
       ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   EXPECT_TRUE(space_bitmap.get() != NULL);
 
@@ -120,7 +120,7 @@
 
 
   for (int i = 0; i < 5 ; ++i) {
-    UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+    std::unique_ptr<ContinuousSpaceBitmap> space_bitmap(
         ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
 
     for (int j = 0; j < 10000; ++j) {
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 27c4c17..10b88b3 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -793,7 +793,7 @@
     // already in the non-full run set (i.e., it was full) insert it
     // into the non-full run set.
     if (run != current_runs_[idx]) {
-      unordered_set<Run*, hash_run, eq_run>* full_runs =
+      std::unordered_set<Run*, hash_run, eq_run>* full_runs =
           kIsDebugBuild ? &full_runs_[idx] : NULL;
       std::set<Run*>::iterator pos = non_full_runs->find(run);
       if (pos == non_full_runs->end()) {
@@ -1160,7 +1160,7 @@
 #ifdef HAVE_ANDROID_OS
   std::vector<Run*> runs;
 #else
-  unordered_set<Run*, hash_run, eq_run> runs;
+  std::unordered_set<Run*, hash_run, eq_run> runs;
 #endif
   for (size_t i = 0; i < num_ptrs; i++) {
     void* ptr = ptrs[i];
@@ -1267,7 +1267,7 @@
       // Check if the run should be moved to non_full_runs_ or
       // free_page_runs_.
       std::set<Run*>* non_full_runs = &non_full_runs_[idx];
-      unordered_set<Run*, hash_run, eq_run>* full_runs =
+      std::unordered_set<Run*, hash_run, eq_run>* full_runs =
           kIsDebugBuild ? &full_runs_[idx] : NULL;
       if (run->IsAllFree()) {
         // It has just become completely free. Free the pages of the
@@ -1281,7 +1281,7 @@
           // If it was full, remove it from the full run set (debug
           // only.)
           if (kIsDebugBuild) {
-            unordered_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
+            std::unordered_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
             DCHECK(pos != full_runs->end());
             full_runs->erase(pos);
             if (kTraceRosAlloc) {
@@ -2054,7 +2054,7 @@
       } else {
         // If it's full, it must in the full run set (debug build only.)
         if (kIsDebugBuild) {
-          unordered_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
+          std::unordered_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
           CHECK(full_runs.find(this) != full_runs.end())
               << " A full run isn't in the full run set " << Dump();
         }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 9ea4306..9464331 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -17,31 +17,21 @@
 #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
 #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
 
-#include <set>
 #include <stdint.h>
 #include <stdlib.h>
-#include <string>
 #include <sys/mman.h>
+#include <memory>
+#include <set>
+#include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "base/mutex.h"
 #include "base/logging.h"
 #include "globals.h"
 #include "mem_map.h"
-#include "UniquePtrCompat.h"
 #include "utils.h"
 
-// Ensure we have an unordered_set until we have worked out C++ library issues.
-#ifdef ART_WITH_STLPORT
-#include <hash_set>
-template <class V, class H, class P>
-class unordered_set : public std::hash_set<V, H, P> {};
-#else  // ART_WITH_STLPORT
-// TODO: avoid the use of using in a header file.
-#include <unordered_set>
-using std::unordered_set;
-#endif  // ART_WITH_STLPORT
-
 namespace art {
 namespace gc {
 namespace allocator {
@@ -451,7 +441,7 @@
   std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
   // The run sets that hold the runs whose slots are all full. This is
   // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
-  unordered_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
+  std::unordered_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
   // The set of free pages.
   std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
   // The dedicated full run, it is always full and shared by all threads when revoking happens.
@@ -479,7 +469,7 @@
   byte* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
   size_t page_map_size_;
   size_t max_page_map_size_;
-  UniquePtr<MemMap> page_map_mem_map_;
+  std::unique_ptr<MemMap> page_map_mem_map_;
 
   // The table that indicates the size of free page runs. These sizes
   // are stored here to avoid storing in the free page header and
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index cc258f5..43331c3 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -99,9 +99,10 @@
     : GarbageCollector(heap,
                        name_prefix +
                        (is_concurrent ? "concurrent mark sweep": "mark sweep")),
+      current_space_bitmap_(nullptr), mark_bitmap_(nullptr), mark_stack_(nullptr),
       gc_barrier_(new Barrier(0)),
       mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock),
-      is_concurrent_(is_concurrent) {
+      is_concurrent_(is_concurrent), live_stack_freeze_size_(0) {
 }
 
 void MarkSweep::InitializePhase() {
@@ -109,19 +110,19 @@
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_region_.Reset();
-  class_count_ = 0;
-  array_count_ = 0;
-  other_count_ = 0;
-  large_object_test_ = 0;
-  large_object_mark_ = 0;
-  overhead_time_ = 0;
-  work_chunks_created_ = 0;
-  work_chunks_deleted_ = 0;
-  reference_count_ = 0;
-  mark_null_count_ = 0;
-  mark_immune_count_ = 0;
-  mark_fastpath_count_ = 0;
-  mark_slowpath_count_ = 0;
+  class_count_.StoreRelaxed(0);
+  array_count_.StoreRelaxed(0);
+  other_count_.StoreRelaxed(0);
+  large_object_test_.StoreRelaxed(0);
+  large_object_mark_.StoreRelaxed(0);
+  overhead_time_ .StoreRelaxed(0);
+  work_chunks_created_.StoreRelaxed(0);
+  work_chunks_deleted_.StoreRelaxed(0);
+  reference_count_.StoreRelaxed(0);
+  mark_null_count_.StoreRelaxed(0);
+  mark_immune_count_.StoreRelaxed(0);
+  mark_fastpath_count_.StoreRelaxed(0);
+  mark_slowpath_count_.StoreRelaxed(0);
   {
     // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -596,7 +597,7 @@
         if (kUseFinger) {
           android_memory_barrier();
           if (reinterpret_cast<uintptr_t>(ref) >=
-              static_cast<uintptr_t>(mark_sweep_->atomic_finger_)) {
+              static_cast<uintptr_t>(mark_sweep_->atomic_finger_.LoadRelaxed())) {
             return;
           }
         }
@@ -881,7 +882,7 @@
           // This function does not handle heap end increasing, so we must use the space end.
           uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
           uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-          atomic_finger_ = static_cast<int32_t>(0xFFFFFFFF);
+          atomic_finger_.StoreRelaxed(AtomicInteger::MaxValue());
 
           // Create a few worker tasks.
           const size_t n = thread_count * 2;
@@ -1214,7 +1215,9 @@
   thread_pool->Wait(self, true, true);
   thread_pool->StopWorkers(self);
   mark_stack_->Reset();
-  CHECK_EQ(work_chunks_created_, work_chunks_deleted_) << " some of the work chunks were leaked";
+  CHECK_EQ(work_chunks_created_.LoadSequentiallyConsistent(),
+           work_chunks_deleted_.LoadSequentiallyConsistent())
+      << " some of the work chunks were leaked";
 }
 
 // Scan anything that's on the mark stack.
@@ -1269,24 +1272,27 @@
 void MarkSweep::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   if (kCountScannedTypes) {
-    VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_
-             << " other=" << other_count_;
+    VLOG(gc) << "MarkSweep scanned classes=" << class_count_.LoadRelaxed()
+        << " arrays=" << array_count_.LoadRelaxed() << " other=" << other_count_.LoadRelaxed();
   }
   if (kCountTasks) {
-    VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_;
+    VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_.LoadRelaxed();
   }
   if (kMeasureOverhead) {
-    VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_);
+    VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_.LoadRelaxed());
   }
   if (kProfileLargeObjects) {
-    VLOG(gc) << "Large objects tested " << large_object_test_ << " marked " << large_object_mark_;
+    VLOG(gc) << "Large objects tested " << large_object_test_.LoadRelaxed()
+        << " marked " << large_object_mark_.LoadRelaxed();
   }
   if (kCountJavaLangRefs) {
-    VLOG(gc) << "References scanned " << reference_count_;
+    VLOG(gc) << "References scanned " << reference_count_.LoadRelaxed();
   }
   if (kCountMarkedObjects) {
-    VLOG(gc) << "Marked: null=" << mark_null_count_ << " immune=" <<  mark_immune_count_
-        << " fastpath=" << mark_fastpath_count_ << " slowpath=" << mark_slowpath_count_;
+    VLOG(gc) << "Marked: null=" << mark_null_count_.LoadRelaxed()
+        << " immune=" <<  mark_immune_count_.LoadRelaxed()
+        << " fastpath=" << mark_fastpath_count_.LoadRelaxed()
+        << " slowpath=" << mark_slowpath_count_.LoadRelaxed();
   }
   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
   mark_stack_->Reset();
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index fd79bf6..d73bf3f 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_H_
 #define ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_H_
 
+#include <memory>
+
 #include "atomic.h"
 #include "barrier.h"
 #include "base/macros.h"
@@ -26,7 +28,6 @@
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -304,14 +305,14 @@
   AtomicInteger mark_fastpath_count_;
   AtomicInteger mark_slowpath_count_;
 
-  // Verification.
-  size_t live_stack_freeze_size_;
-
-  UniquePtr<Barrier> gc_barrier_;
+  std::unique_ptr<Barrier> gc_barrier_;
   Mutex mark_stack_lock_ ACQUIRED_AFTER(Locks::classlinker_classes_lock_);
 
   const bool is_concurrent_;
 
+  // Verification.
+  size_t live_stack_freeze_size_;
+
  private:
   friend class AddIfReachesAllocSpaceVisitor;  // Used by mod-union table.
   friend class CardScanTask;
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 95a2c96..d4e26ab 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -590,6 +590,7 @@
     // If it's allocated after the last GC (younger), copy it to the to-space.
     forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
   }
+  CHECK(forward_address != nullptr) << "Out of memory in the to-space.";
   ++objects_moved_;
   bytes_moved_ += bytes_allocated;
   // Copy over the object and add it to the mark stack since we still need to update its
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index dacb5ae..a95abe4 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
 #define ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
 
+#include <memory>
+
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -25,7 +27,6 @@
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 7cee5a0..03b72b6 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -96,7 +96,7 @@
     CHECK_LE(obj->SizeOf(), usable_size);
   }
   const size_t new_num_bytes_allocated =
-      static_cast<size_t>(num_bytes_allocated_.FetchAndAdd(bytes_allocated)) + bytes_allocated;
+      static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated)) + bytes_allocated;
   // TODO: Deprecate.
   if (kInstrumented) {
     if (Runtime::Current()->HasStatsEnabled()) {
@@ -264,7 +264,7 @@
     // Only if the allocation succeeded, record the time.
     if (allocated_obj != nullptr) {
       uint64_t allocation_end_time = NanoTime() / kTimeAdjust;
-      heap_->total_allocation_time_.FetchAndAdd(allocation_end_time - allocation_start_time_);
+      heap_->total_allocation_time_.FetchAndAddSequentiallyConsistent(allocation_end_time - allocation_start_time_);
     }
   }
 };
@@ -279,7 +279,7 @@
 
 template <bool kGrow>
 inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) {
-  size_t new_footprint = num_bytes_allocated_ + alloc_size;
+  size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size;
   if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
     if (UNLIKELY(new_footprint > growth_limit_)) {
       return true;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ef31be3..ea1ccdd 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -20,6 +20,7 @@
 #include <cutils/trace.h>
 
 #include <limits>
+#include <memory>
 #include <vector>
 
 #include "base/histogram-inl.h"
@@ -64,7 +65,6 @@
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "thread_list.h"
-#include "UniquePtrCompat.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -116,6 +116,7 @@
       long_pause_log_threshold_(long_pause_log_threshold),
       long_gc_log_threshold_(long_gc_log_threshold),
       ignore_max_footprint_(ignore_max_footprint),
+      zygote_creation_lock_("zygote creation lock", kZygoteCreationLock),
       have_zygote_space_(false),
       large_object_threshold_(std::numeric_limits<size_t>::max()),  // Starts out disabled.
       collector_type_running_(kCollectorTypeNone),
@@ -292,7 +293,7 @@
   }
 
   // TODO: Count objects in the image space here.
-  num_bytes_allocated_ = 0;
+  num_bytes_allocated_.StoreRelaxed(0);
 
   // Default mark stack size in bytes.
   static const size_t default_mark_stack_size = 64 * KB;
@@ -658,13 +659,13 @@
 
 void Heap::RegisterGCAllocation(size_t bytes) {
   if (this != nullptr) {
-    gc_memory_overhead_.FetchAndAdd(bytes);
+    gc_memory_overhead_.FetchAndAddSequentiallyConsistent(bytes);
   }
 }
 
 void Heap::RegisterGCDeAllocation(size_t bytes) {
   if (this != nullptr) {
-    gc_memory_overhead_.FetchAndSub(bytes);
+    gc_memory_overhead_.FetchAndSubSequentiallyConsistent(bytes);
   }
 }
 
@@ -699,7 +700,8 @@
     }
     collector->ResetMeasurements();
   }
-  uint64_t allocation_time = static_cast<uint64_t>(total_allocation_time_) * kTimeAdjust;
+  uint64_t allocation_time =
+      static_cast<uint64_t>(total_allocation_time_.LoadRelaxed()) * kTimeAdjust;
   if (total_duration != 0) {
     const double total_seconds = static_cast<double>(total_duration / 1000) / 1000000.0;
     os << "Total time spent in GC: " << PrettyDuration(total_duration) << "\n";
@@ -719,7 +721,7 @@
   }
   os << "Total mutator paused time: " << PrettyDuration(total_paused_time) << "\n";
   os << "Total time waiting for GC to complete: " << PrettyDuration(total_wait_time_) << "\n";
-  os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_;
+  os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_.LoadRelaxed();
   BaseMutex::DumpAll(os);
 }
 
@@ -1021,7 +1023,7 @@
     return;
   }
   // Ignore early dawn of the universe verifications.
-  if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.Load()) < 10 * KB)) {
+  if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.LoadRelaxed()) < 10 * KB)) {
     return;
   }
   CHECK(IsAligned<kObjectAlignment>(obj)) << "Object isn't aligned: " << obj;
@@ -1052,9 +1054,9 @@
   // Use signed comparison since freed bytes can be negative when background compaction foreground
   // transitions occurs. This is caused by the moving objects from a bump pointer space to a
   // free list backed space typically increasing memory footprint due to padding and binning.
-  DCHECK_LE(freed_bytes, static_cast<int64_t>(num_bytes_allocated_.Load()));
+  DCHECK_LE(freed_bytes, static_cast<int64_t>(num_bytes_allocated_.LoadRelaxed()));
   // Note: This relies on 2s complement for handling negative freed_bytes.
-  num_bytes_allocated_.FetchAndSub(static_cast<ssize_t>(freed_bytes));
+  num_bytes_allocated_.FetchAndSubSequentiallyConsistent(static_cast<ssize_t>(freed_bytes));
   if (Runtime::Current()->HasStatsEnabled()) {
     RuntimeStats* thread_stats = Thread::Current()->GetStats();
     thread_stats->freed_objects += freed_objects;
@@ -1312,7 +1314,7 @@
   VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_)
              << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
-  uint32_t before_allocated = num_bytes_allocated_.Load();
+  uint32_t before_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   ThreadList* tl = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
@@ -1390,7 +1392,7 @@
   uint64_t duration = NanoTime() - start_time;
   GrowForUtilization(semi_space_collector_);
   FinishGC(self, collector::kGcTypeFull);
-  int32_t after_allocated = num_bytes_allocated_.Load();
+  int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   int32_t delta_allocated = before_allocated - after_allocated;
   LOG(INFO) << "Heap transition to " << process_state_ << " took "
       << PrettyDuration(duration) << " saved at least " << PrettySize(delta_allocated);
@@ -1551,7 +1553,6 @@
 
 void Heap::PreZygoteFork() {
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
-  static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
   Thread* self = Thread::Current();
   MutexLock mu(self, zygote_creation_lock_);
   // Try to see if we have any Zygote spaces.
@@ -1861,39 +1862,54 @@
 // Verify a reference from an object.
 class VerifyReferenceVisitor {
  public:
-  explicit VerifyReferenceVisitor(Heap* heap, bool verify_referent)
+  explicit VerifyReferenceVisitor(Heap* heap, Atomic<size_t>* fail_count, bool verify_referent)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_)
-      : heap_(heap), failed_(false), verify_referent_(verify_referent) {}
+      : heap_(heap), fail_count_(fail_count), verify_referent_(verify_referent) {}
 
-  bool Failed() const {
-    return failed_;
+  size_t GetFailureCount() const {
+    return fail_count_->LoadSequentiallyConsistent();
   }
 
   void operator()(mirror::Class* klass, mirror::Reference* ref) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (verify_referent_) {
-      this->operator()(ref, mirror::Reference::ReferentOffset(), false);
+      VerifyReference(ref, ref->GetReferent(), mirror::Reference::ReferentOffset());
     }
   }
 
   void operator()(mirror::Object* obj, MemberOffset offset, bool /*is_static*/) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    this->operator()(obj, obj->GetFieldObject<mirror::Object>(offset), offset);
+    VerifyReference(obj, obj->GetFieldObject<mirror::Object>(offset), offset);
   }
 
+  bool IsLive(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    return heap_->IsLiveObjectLocked(obj, true, false, true);
+  }
+
+  static void VerifyRootCallback(mirror::Object** root, void* arg, uint32_t thread_id,
+                                 RootType root_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
+    if (!visitor->VerifyReference(nullptr, *root, MemberOffset(0))) {
+      LOG(ERROR) << "Root " << *root << " is dead with type " << PrettyTypeOf(*root)
+          << " thread_id= " << thread_id << " root_type= " << root_type;
+    }
+  }
+
+ private:
   // TODO: Fix the no thread safety analysis.
-  void operator()(mirror::Object* obj, mirror::Object* ref, MemberOffset offset) const
+  // Returns false on failure.
+  bool VerifyReference(mirror::Object* obj, mirror::Object* ref, MemberOffset offset) const
       NO_THREAD_SAFETY_ANALYSIS {
     if (ref == nullptr || IsLive(ref)) {
       // Verify that the reference is live.
-      return;
+      return true;
     }
-    if (!failed_) {
+    if (fail_count_->FetchAndAddSequentiallyConsistent(1) == 0) {
       // Print message on only on first failure to prevent spam.
       LOG(ERROR) << "!!!!!!!!!!!!!!Heap corruption detected!!!!!!!!!!!!!!!!!!!";
-      failed_ = true;
     }
     if (obj != nullptr) {
+      // Only do this part for non roots.
       accounting::CardTable* card_table = heap_->GetCardTable();
       accounting::ObjectStack* alloc_stack = heap_->allocation_stack_.get();
       accounting::ObjectStack* live_stack = heap_->live_stack_.get();
@@ -1972,42 +1988,29 @@
       // Search to see if any of the roots reference our reference.
       arg = const_cast<void*>(reinterpret_cast<const void*>(ref));
       Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg);
-    } else {
-      LOG(ERROR) << "Root " << ref << " is dead with type " << PrettyTypeOf(ref);
     }
+    return false;
   }
 
-  bool IsLive(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
-    return heap_->IsLiveObjectLocked(obj, true, false, true);
-  }
-
-  static void VerifyRoots(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
-                          RootType /*root_type*/) {
-    VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
-    (*visitor)(nullptr, *root, MemberOffset(0));
-  }
-
- private:
   Heap* const heap_;
-  mutable bool failed_;
-  bool verify_referent_;
+  Atomic<size_t>* const fail_count_;
+  const bool verify_referent_;
 };
 
 // Verify all references within an object, for use with HeapBitmap::Visit.
 class VerifyObjectVisitor {
  public:
-  explicit VerifyObjectVisitor(Heap* heap, bool verify_referent)
-      : heap_(heap), failed_(false), verify_referent_(verify_referent) {
+  explicit VerifyObjectVisitor(Heap* heap, Atomic<size_t>* fail_count, bool verify_referent)
+      : heap_(heap), fail_count_(fail_count), verify_referent_(verify_referent) {
   }
 
   void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     // Note: we are verifying the references in obj but not obj itself, this is because obj must
     // be live or else how did we find it in the live bitmap?
-    VerifyReferenceVisitor visitor(heap_, verify_referent_);
+    VerifyReferenceVisitor visitor(heap_, fail_count_, verify_referent_);
     // The class doesn't count as a reference but we should verify it anyways.
     obj->VisitReferences<true>(visitor, visitor);
-    failed_ = failed_ || visitor.Failed();
   }
 
   static void VisitCallback(mirror::Object* obj, void* arg)
@@ -2016,18 +2019,18 @@
     visitor->operator()(obj);
   }
 
-  bool Failed() const {
-    return failed_;
+  size_t GetFailureCount() const {
+    return fail_count_->LoadSequentiallyConsistent();
   }
 
  private:
   Heap* const heap_;
-  mutable bool failed_;
+  Atomic<size_t>* const fail_count_;
   const bool verify_referent_;
 };
 
 // Must do this with mutators suspended since we are directly accessing the allocation stacks.
-bool Heap::VerifyHeapReferences(bool verify_referents) {
+size_t Heap::VerifyHeapReferences(bool verify_referents) {
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   // Lets sort our allocation stacks so that we can efficiently binary search them.
@@ -2036,7 +2039,8 @@
   // Since we sorted the allocation stack content, need to revoke all
   // thread-local allocation stacks.
   RevokeAllThreadLocalAllocationStacks(self);
-  VerifyObjectVisitor visitor(this, verify_referents);
+  Atomic<size_t> fail_count_(0);
+  VerifyObjectVisitor visitor(this, &fail_count_, verify_referents);
   // Verify objects in the allocation stack since these will be objects which were:
   // 1. Allocated prior to the GC (pre GC verification).
   // 2. Allocated during the GC (pre sweep GC verification).
@@ -2044,8 +2048,8 @@
   // pointing to dead objects if they are not reachable.
   VisitObjects(VerifyObjectVisitor::VisitCallback, &visitor);
   // Verify the roots:
-  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRoots, &visitor);
-  if (visitor.Failed()) {
+  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRootCallback, &visitor);
+  if (visitor.GetFailureCount() > 0) {
     // Dump mod-union tables.
     for (const auto& table_pair : mod_union_tables_) {
       accounting::ModUnionTable* mod_union_table = table_pair.second;
@@ -2057,9 +2061,8 @@
       remembered_set->Dump(LOG(ERROR) << remembered_set->GetName() << ": ");
     }
     DumpSpaces();
-    return false;
   }
-  return true;
+  return visitor.GetFailureCount();
 }
 
 class VerifyReferenceCardVisitor {
@@ -2262,8 +2265,10 @@
   if (verify_pre_gc_heap_) {
     TimingLogger::ScopedSplit split("PreGcVerifyHeapReferences", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    if (!VerifyHeapReferences()) {
-      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed";
+    size_t failures = VerifyHeapReferences();
+    if (failures > 0) {
+      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed with " << failures
+          << " failures";
     }
   }
   // Check that all objects which reference things in the live stack are on dirty cards.
@@ -2316,8 +2321,10 @@
     SwapSemiSpaces();
     // Pass in false since concurrent reference processing can mean that the reference referents
     // may point to dead objects at the point which PreSweepingGcVerification is called.
-    if (!VerifyHeapReferences(false)) {
-      LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed";
+    size_t failures = VerifyHeapReferences(false);
+    if (failures > 0) {
+      LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed with " << failures
+          << " failures";
     }
     SwapSemiSpaces();
     gc->SwapBitmaps();
@@ -2342,8 +2349,10 @@
   if (verify_post_gc_heap_) {
     TimingLogger::ScopedSplit split("PostGcVerifyHeapReferences", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    if (!VerifyHeapReferences()) {
-      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed";
+    size_t failures = VerifyHeapReferences();
+    if (failures > 0) {
+      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed with " << failures
+          << " failures";
     }
   }
 }
@@ -2421,7 +2430,7 @@
 }
 
 void Heap::UpdateMaxNativeFootprint() {
-  size_t native_size = native_bytes_allocated_;
+  size_t native_size = native_bytes_allocated_.LoadRelaxed();
   // TODO: Tune the native heap utilization to be a value other than the java heap utilization.
   size_t target_size = native_size / GetTargetHeapUtilization();
   if (target_size > native_size + max_free_) {
@@ -2693,21 +2702,22 @@
     native_need_to_run_finalization_ = false;
   }
   // Total number of native bytes allocated.
-  native_bytes_allocated_.FetchAndAdd(bytes);
-  if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_gc_watermark_) {
+  size_t new_native_bytes_allocated = native_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes);
+  new_native_bytes_allocated += bytes;
+  if (new_native_bytes_allocated > native_footprint_gc_watermark_) {
     collector::GcType gc_type = have_zygote_space_ ? collector::kGcTypePartial :
         collector::kGcTypeFull;
 
     // The second watermark is higher than the gc watermark. If you hit this it means you are
     // allocating native objects faster than the GC can keep up with.
-    if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
+    if (new_native_bytes_allocated > native_footprint_limit_) {
       if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) != collector::kGcTypeNone) {
         // Just finished a GC, attempt to run finalizers.
         RunFinalization(env);
         CHECK(!env->ExceptionCheck());
       }
       // If we still are over the watermark, attempt a GC for alloc and run finalizers.
-      if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
+      if (new_native_bytes_allocated > native_footprint_limit_) {
         CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false);
         RunFinalization(env);
         native_need_to_run_finalization_ = false;
@@ -2729,7 +2739,7 @@
 void Heap::RegisterNativeFree(JNIEnv* env, int bytes) {
   int expected_size, new_size;
   do {
-    expected_size = native_bytes_allocated_.Load();
+    expected_size = native_bytes_allocated_.LoadRelaxed();
     new_size = expected_size - bytes;
     if (UNLIKELY(new_size < 0)) {
       ScopedObjectAccess soa(env);
@@ -2738,7 +2748,7 @@
                                  "registered as allocated", bytes, expected_size).c_str());
       break;
     }
-  } while (!native_bytes_allocated_.CompareAndSwap(expected_size, new_size));
+  } while (!native_bytes_allocated_.CompareExchangeWeakRelaxed(expected_size, new_size));
 }
 
 size_t Heap::GetTotalMemory() const {
@@ -2773,9 +2783,9 @@
   CHECK(remembered_set != nullptr);
   space::Space* space = remembered_set->GetSpace();
   CHECK(space != nullptr);
-  CHECK(remembered_sets_.find(space) == remembered_sets_.end());
+  CHECK(remembered_sets_.find(space) == remembered_sets_.end()) << space;
   remembered_sets_.Put(space, remembered_set);
-  CHECK(remembered_sets_.find(space) != remembered_sets_.end());
+  CHECK(remembered_sets_.find(space) != remembered_sets_.end()) << space;
 }
 
 void Heap::RemoveRememberedSet(space::Space* space) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 3b071d1..887b17e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -109,8 +109,6 @@
 };
 std::ostream& operator<<(std::ostream& os, const ProcessState& process_state);
 
-std::ostream& operator<<(std::ostream& os, const RootType& root_type);
-
 class Heap {
  public:
   // If true, measure the total allocation time.
@@ -144,7 +142,7 @@
                 size_t max_free, double target_utilization,
                 double foreground_heap_growth_multiplier, size_t capacity,
                 const std::string& original_image_file_name,
-                const InstructionSet image_instruction_set,
+                InstructionSet image_instruction_set,
                 CollectorType foreground_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
@@ -218,7 +216,8 @@
 
   // Check sanity of all live references.
   void VerifyHeap() LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  bool VerifyHeapReferences(bool verify_referents = true)
+  // Returns how many failures occured.
+  size_t VerifyHeapReferences(bool verify_referents = true)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
   bool VerifyMissingCardMarks()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -373,7 +372,7 @@
 
   // Returns the number of bytes currently allocated.
   size_t GetBytesAllocated() const {
-    return num_bytes_allocated_;
+    return num_bytes_allocated_.LoadSequentiallyConsistent();
   }
 
   // Returns the number of objects currently allocated.
@@ -409,7 +408,7 @@
 
   // Implements java.lang.Runtime.freeMemory.
   size_t GetFreeMemory() const {
-    return GetTotalMemory() - num_bytes_allocated_;
+    return GetTotalMemory() - num_bytes_allocated_.LoadSequentiallyConsistent();
   }
 
   // get the space that corresponds to an object's address. Current implementation searches all
@@ -733,7 +732,7 @@
   space::LargeObjectSpace* large_object_space_;
 
   // The card table, dirtied by the write barrier.
-  UniquePtr<accounting::CardTable> card_table_;
+  std::unique_ptr<accounting::CardTable> card_table_;
 
   // A mod-union table remembers all of the references from the it's space to other spaces.
   SafeMap<space::Space*, accounting::ModUnionTable*> mod_union_tables_;
@@ -779,6 +778,9 @@
   // useful for benchmarking since it reduces time spent in GC to a low %.
   const bool ignore_max_footprint_;
 
+  // Lock which guards zygote space creation.
+  Mutex zygote_creation_lock_;
+
   // If we have a zygote space.
   bool have_zygote_space_;
 
@@ -788,7 +790,7 @@
   // Guards access to the state of GC, associated conditional variable is used to signal when a GC
   // completes.
   Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  UniquePtr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
+  std::unique_ptr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
 
   // Reference processor;
   ReferenceProcessor reference_processor_;
@@ -880,7 +882,7 @@
   };
 
   // Parallel GC data structures.
-  UniquePtr<ThreadPool> thread_pool_;
+  std::unique_ptr<ThreadPool> thread_pool_;
 
   // The nanosecond time at which the last GC ended.
   uint64_t last_gc_time_ns_;
@@ -893,19 +895,19 @@
   uint64_t allocation_rate_;
 
   // For a GC cycle, a bitmap that is set corresponding to the
-  UniquePtr<accounting::HeapBitmap> live_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
-  UniquePtr<accounting::HeapBitmap> mark_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
+  std::unique_ptr<accounting::HeapBitmap> live_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
+  std::unique_ptr<accounting::HeapBitmap> mark_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
 
   // Mark stack that we reuse to avoid re-allocating the mark stack.
-  UniquePtr<accounting::ObjectStack> mark_stack_;
+  std::unique_ptr<accounting::ObjectStack> mark_stack_;
 
   // Allocation stack, new allocations go here so that we can do sticky mark bits. This enables us
   // to use the live bitmap as the old mark bitmap.
   const size_t max_allocation_stack_size_;
-  UniquePtr<accounting::ObjectStack> allocation_stack_;
+  std::unique_ptr<accounting::ObjectStack> allocation_stack_;
 
   // Second allocation stack so that we can process allocation with the heap unlocked.
-  UniquePtr<accounting::ObjectStack> live_stack_;
+  std::unique_ptr<accounting::ObjectStack> live_stack_;
 
   // Allocator type.
   AllocatorType current_allocator_;
diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc
index 8850b92..4176f4a 100644
--- a/runtime/gc/heap_test.cc
+++ b/runtime/gc/heap_test.cc
@@ -48,7 +48,7 @@
         hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
     for (size_t i = 0; i < 1024; ++i) {
       StackHandleScope<1> hs(soa.Self());
-      Handle<mirror::ObjectArray<mirror::Object> > array(hs.NewHandle(
+      Handle<mirror::ObjectArray<mirror::Object>> array(hs.NewHandle(
           mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), c.Get(), 2048)));
       for (size_t j = 0; j < 2048; ++j) {
         mirror::String* string = mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!");
@@ -63,7 +63,7 @@
 TEST_F(HeapTest, HeapBitmapCapacityTest) {
   byte* heap_begin = reinterpret_cast<byte*>(0x1000);
   const size_t heap_capacity = kObjectAlignment * (sizeof(intptr_t) * 8 + 1);
-  UniquePtr<accounting::ContinuousSpaceBitmap> bitmap(
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
       accounting::ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   mirror::Object* fake_end_of_heap_object =
       reinterpret_cast<mirror::Object*>(&heap_begin[heap_capacity - kObjectAlignment]);
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 497a61f..71c295e 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -48,8 +48,8 @@
   end_ += num_bytes;
   *bytes_allocated = num_bytes;
   // Use the CAS free versions as an optimization.
-  objects_allocated_ = objects_allocated_ + 1;
-  bytes_allocated_ = bytes_allocated_ + num_bytes;
+  objects_allocated_.StoreRelaxed(objects_allocated_.LoadRelaxed() + 1);
+  bytes_allocated_.StoreRelaxed(bytes_allocated_.LoadRelaxed() + num_bytes);
   if (UNLIKELY(usable_size != nullptr)) {
     *usable_size = num_bytes;
   }
@@ -76,8 +76,8 @@
 inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) {
   mirror::Object* ret = AllocNonvirtualWithoutAccounting(num_bytes);
   if (ret != nullptr) {
-    objects_allocated_.FetchAndAdd(1);
-    bytes_allocated_.FetchAndAdd(num_bytes);
+    objects_allocated_.FetchAndAddSequentiallyConsistent(1);
+    bytes_allocated_.FetchAndAddSequentiallyConsistent(num_bytes);
   }
   return ret;
 }
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 90ffe59..fd0a92d 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -28,7 +28,7 @@
                                            byte* requested_begin) {
   capacity = RoundUp(capacity, kPageSize);
   std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
+  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
                                                  PROT_READ | PROT_WRITE, true, &error_msg));
   if (mem_map.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
@@ -68,8 +68,8 @@
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
   SetEnd(Begin());
-  objects_allocated_ = 0;
-  bytes_allocated_ = 0;
+  objects_allocated_.StoreRelaxed(0);
+  bytes_allocated_.StoreRelaxed(0);
   growth_end_ = Limit();
   {
     MutexLock mu(Thread::Current(), block_lock_);
@@ -204,7 +204,7 @@
 
 uint64_t BumpPointerSpace::GetBytesAllocated() {
   // Start out pre-determined amount (blocks which are not being allocated into).
-  uint64_t total = static_cast<uint64_t>(bytes_allocated_.Load());
+  uint64_t total = static_cast<uint64_t>(bytes_allocated_.LoadRelaxed());
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   MutexLock mu2(self, *Locks::thread_list_lock_);
@@ -222,7 +222,7 @@
 
 uint64_t BumpPointerSpace::GetObjectsAllocated() {
   // Start out pre-determined amount (blocks which are not being allocated into).
-  uint64_t total = static_cast<uint64_t>(objects_allocated_.Load());
+  uint64_t total = static_cast<uint64_t>(objects_allocated_.LoadRelaxed());
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   MutexLock mu2(self, *Locks::thread_list_lock_);
@@ -239,8 +239,8 @@
 }
 
 void BumpPointerSpace::RevokeThreadLocalBuffersLocked(Thread* thread) {
-  objects_allocated_.FetchAndAdd(thread->GetThreadLocalObjectsAllocated());
-  bytes_allocated_.FetchAndAdd(thread->GetThreadLocalBytesAllocated());
+  objects_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalObjectsAllocated());
+  bytes_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalBytesAllocated());
   thread->SetTlab(nullptr, nullptr);
 }
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 446f898..45fee14 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -103,8 +103,11 @@
                                    const InstructionSet image_isa,
                                    std::string* image_filename,
                                    bool *is_system) {
-  if (OS::FileExists(image_location)) {
-    *image_filename = image_location;
+  // image_location = /system/framework/boot.art
+  // system_image_location = /system/framework/<image_isa>/boot.art
+  std::string system_image_filename(GetSystemImageFilename(image_location, image_isa));
+  if (OS::FileExists(system_image_filename.c_str())) {
+    *image_filename = system_image_filename;
     *is_system = true;
     return true;
   }
@@ -113,6 +116,9 @@
 
   // Always set output location even if it does not exist,
   // so that the caller knows where to create the image.
+  //
+  // image_location = /system/framework/boot.art
+  // *image_filename = /data/dalvik-cache/<image_isa>/boot.art
   *image_filename = GetDalvikCacheFilenameOrDie(image_location, dalvik_cache.c_str());
   *is_system = false;
   return OS::FileExists(image_filename->c_str());
@@ -123,8 +129,8 @@
   std::string image_filename;
   bool is_system = false;
   if (FindImageFilename(image_location, image_isa, &image_filename, &is_system)) {
-    UniquePtr<File> image_file(OS::OpenFileForReading(image_filename.c_str()));
-    UniquePtr<ImageHeader> image_header(new ImageHeader);
+    std::unique_ptr<File> image_file(OS::OpenFileForReading(image_filename.c_str()));
+    std::unique_ptr<ImageHeader> image_header(new ImageHeader);
     const bool success = image_file->ReadFully(image_header.get(), sizeof(ImageHeader));
     if (!success || !image_header->IsValid()) {
       LOG(FATAL) << "Invalid Image header for: " << image_filename;
@@ -194,7 +200,7 @@
     LOG(INFO) << "ImageSpace::Init entering image_filename=" << image_filename;
   }
 
-  UniquePtr<File> file(OS::OpenFileForReading(image_filename));
+  std::unique_ptr<File> file(OS::OpenFileForReading(image_filename));
   if (file.get() == NULL) {
     *error_msg = StringPrintf("Failed to open '%s'", image_filename);
     return nullptr;
@@ -207,7 +213,7 @@
   }
 
   // Note: The image header is part of the image due to mmap page alignment required of offset.
-  UniquePtr<MemMap> map(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
+  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
                                                  image_header.GetImageSize(),
                                                  PROT_READ | PROT_WRITE,
                                                  MAP_PRIVATE,
@@ -223,7 +229,7 @@
   CHECK_EQ(image_header.GetImageBegin(), map->Begin());
   DCHECK_EQ(0, memcmp(&image_header, map->Begin(), sizeof(ImageHeader)));
 
-  UniquePtr<MemMap> image_map(MemMap::MapFileAtAddress(nullptr, image_header.GetImageBitmapSize(),
+  std::unique_ptr<MemMap> image_map(MemMap::MapFileAtAddress(nullptr, image_header.GetImageBitmapSize(),
                                                        PROT_READ, MAP_PRIVATE,
                                                        file->Fd(), image_header.GetBitmapOffset(),
                                                        false,
@@ -233,10 +239,10 @@
     *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
     return nullptr;
   }
-  uint32_t bitmap_index = bitmap_index_.FetchAndAdd(1);
+  uint32_t bitmap_index = bitmap_index_.FetchAndAddSequentiallyConsistent(1);
   std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename,
                                        bitmap_index));
-  UniquePtr<accounting::ContinuousSpaceBitmap> bitmap(
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
       accounting::ContinuousSpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(),
                                                           reinterpret_cast<byte*>(map->Begin()),
                                                           map->Size()));
@@ -245,11 +251,14 @@
     return nullptr;
   }
 
-  UniquePtr<ImageSpace> space(new ImageSpace(image_filename, image_location,
+  std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename, image_location,
                                              map.release(), bitmap.release()));
-  if (kIsDebugBuild) {
-    space->VerifyImageAllocations();
-  }
+
+  // VerifyImageAllocations() will be called later in Runtime::Init()
+  // as some class roots like ArtMethod::java_lang_reflect_ArtMethod_
+  // and ArtField::java_lang_reflect_ArtField_, which are used from
+  // Object::SizeOf() which VerifyImageAllocations() calls, are not
+  // set yet at this point.
 
   space->oat_file_.reset(space->OpenOatFile(image_filename, error_msg));
   if (space->oat_file_.get() == nullptr) {
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 622371f..372db3a 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -43,13 +43,13 @@
   // creation of the alloc space. The ReleaseOatFile will later be
   // used to transfer ownership of the OatFile to the ClassLinker when
   // it is initialized.
-  static ImageSpace* Create(const char* image, const InstructionSet image_isa)
+  static ImageSpace* Create(const char* image, InstructionSet image_isa)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Reads the image header from the specified image location for the
   // instruction set image_isa.
   static ImageHeader* ReadImageHeaderOrDie(const char* image_location,
-                                           const InstructionSet image_isa);
+                                           InstructionSet image_isa);
 
   // Releases the OatFile from the ImageSpace so it can be transfer to
   // the caller, presumably the ClassLinker.
@@ -114,7 +114,7 @@
   //
   // Returns true if an image was found, false otherwise.
   static bool FindImageFilename(const char* image_location,
-                                const InstructionSet image_isa,
+                                InstructionSet image_isa,
                                 std::string* location,
                                 bool* is_system);
 
@@ -128,7 +128,7 @@
 
   static Atomic<uint32_t> bitmap_index_;
 
-  UniquePtr<accounting::ContinuousSpaceBitmap> live_bitmap_;
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap_;
 
   ImageSpace(const std::string& name, const char* image_location,
              MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap);
@@ -136,7 +136,7 @@
   // The OatFile associated with the image during early startup to
   // reserve space contiguous to the image. It is later released to
   // the ClassLinker during it's initialization.
-  UniquePtr<OatFile> oat_file_;
+  std::unique_ptr<OatFile> oat_file_;
 
   const std::string image_location_;
 
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 6c851af..e63cc39 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -16,11 +16,12 @@
 
 #include "large_object_space.h"
 
+#include <memory>
+
 #include "gc/accounting/space_bitmap-inl.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
-#include "UniquePtrCompat.h"
 #include "image.h"
 #include "os.h"
 #include "space-inl.h"
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 0daefba..a84b43a 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -133,9 +133,9 @@
   // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<mirror::Object*,
-      accounting::GcAllocator<mirror::Object*> > large_objects_ GUARDED_BY(lock_);
+      accounting::GcAllocator<mirror::Object*>> large_objects_ GUARDED_BY(lock_);
   typedef SafeMap<mirror::Object*, MemMap*, std::less<mirror::Object*>,
-      accounting::GcAllocator<std::pair<mirror::Object*, MemMap*> > > MemMaps;
+      accounting::GcAllocator<std::pair<mirror::Object*, MemMap*>>> MemMaps;
   MemMaps mem_maps_ GUARDED_BY(lock_);
 };
 
@@ -256,11 +256,11 @@
   AllocationHeader* GetAllocationHeader(const mirror::Object* obj);
 
   typedef std::set<AllocationHeader*, AllocationHeader::SortByPrevFree,
-                   accounting::GcAllocator<AllocationHeader*> > FreeBlocks;
+                   accounting::GcAllocator<AllocationHeader*>> FreeBlocks;
 
   // There is not footer for any allocations at the end of the space, so we keep track of how much
   // free space there is at the end manually.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   size_t free_end_ GUARDED_BY(lock_);
   FreeBlocks free_blocks_ GUARDED_BY(lock_);
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 8a6636d..23c67ff 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -39,7 +39,7 @@
 
     static const size_t num_allocations = 64;
     static const size_t max_allocation_size = 0x100000;
-    std::vector<std::pair<mirror::Object*, size_t> > requests;
+    std::vector<std::pair<mirror::Object*, size_t>> requests;
 
     for (size_t phase = 0; phase < 2; ++phase) {
       while (requests.size() < num_allocations) {
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index ba46dcc..e710409 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -188,7 +188,7 @@
   VLOG(heap) << "Capacity " << PrettySize(capacity);
   // Remap the tail.
   std::string error_msg;
-  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
+  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
                                                     PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
   void* allocator = CreateAllocator(end_, starting_size_, initial_size_, capacity, low_memory_mode);
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 343bc29..8415fa1 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -17,9 +17,9 @@
 #ifndef ART_RUNTIME_GC_SPACE_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_SPACE_H_
 
+#include <memory>
 #include <string>
 
-#include "UniquePtrCompat.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/space_bitmap.h"
@@ -339,8 +339,8 @@
  protected:
   DiscontinuousSpace(const std::string& name, GcRetentionPolicy gc_retention_policy);
 
-  UniquePtr<accounting::LargeObjectBitmap> live_bitmap_;
-  UniquePtr<accounting::LargeObjectBitmap> mark_bitmap_;
+  std::unique_ptr<accounting::LargeObjectBitmap> live_bitmap_;
+  std::unique_ptr<accounting::LargeObjectBitmap> mark_bitmap_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(DiscontinuousSpace);
@@ -374,7 +374,7 @@
   }
 
   // Underlying storage of the space
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(MemMapSpace);
@@ -419,9 +419,9 @@
   virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() = 0;
 
  protected:
-  UniquePtr<accounting::ContinuousSpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::ContinuousSpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::ContinuousSpaceBitmap> temp_bitmap_;
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap_;
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> mark_bitmap_;
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> temp_bitmap_;
 
   ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
                              byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 407d362..a2d4942 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -20,10 +20,10 @@
 #include "zygote_space.h"
 
 #include <stdint.h>
+#include <memory>
 
 #include "common_runtime_test.h"
 #include "globals.h"
-#include "UniquePtrCompat.h"
 #include "mirror/array-inl.h"
 #include "mirror/object-inl.h"
 
@@ -129,37 +129,37 @@
 void SpaceTest::InitTestBody(CreateSpaceFn create_space) {
   {
     // Init < max == growth
-    UniquePtr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
     EXPECT_TRUE(space.get() != nullptr);
   }
   {
     // Init == max == growth
-    UniquePtr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
     EXPECT_TRUE(space.get() != nullptr);
   }
   {
     // Init > max == growth
-    UniquePtr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
     EXPECT_TRUE(space.get() == nullptr);
   }
   {
     // Growth == init < max
-    UniquePtr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
     EXPECT_TRUE(space.get() != nullptr);
   }
   {
     // Growth < init < max
-    UniquePtr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
     EXPECT_TRUE(space.get() == nullptr);
   }
   {
     // Init < growth < max
-    UniquePtr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
     EXPECT_TRUE(space.get() != nullptr);
   }
   {
     // Init < max < growth
-    UniquePtr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
     EXPECT_TRUE(space.get() == nullptr);
   }
 }
@@ -398,7 +398,7 @@
 
   // Fill the space with lots of small objects up to the growth limit
   size_t max_objects = (growth_limit / (object_size > 0 ? object_size : 8)) + 1;
-  UniquePtr<mirror::Object*[]> lots_of_objects(new mirror::Object*[max_objects]);
+  std::unique_ptr<mirror::Object*[]> lots_of_objects(new mirror::Object*[max_objects]);
   size_t last_object = 0;  // last object for which allocation succeeded
   size_t amount_allocated = 0;  // amount of space allocated
   Thread* self = Thread::Current();
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index 0466413..fb3a12e 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -115,7 +115,7 @@
     // Need to mark the card since this will update the mod-union table next GC cycle.
     card_table->MarkCard(ptrs[i]);
   }
-  zygote_space->objects_allocated_.FetchAndSub(num_ptrs);
+  zygote_space->objects_allocated_.FetchAndSubSequentiallyConsistent(num_ptrs);
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
index 50fc62b..5d5fe76 100644
--- a/runtime/gc/space/zygote_space.h
+++ b/runtime/gc/space/zygote_space.h
@@ -65,7 +65,7 @@
   }
 
   uint64_t GetObjectsAllocated() {
-    return objects_allocated_;
+    return objects_allocated_.LoadSequentiallyConsistent();
   }
 
   void Clear() OVERRIDE;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index fc8b594..91f1718 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -475,7 +475,7 @@
         }
       }
 
-      UniquePtr<File> file(new File(out_fd, filename_));
+      std::unique_ptr<File> file(new File(out_fd, filename_));
       okay = file->WriteFully(header_data_ptr_, header_data_size_) &&
           file->WriteFully(body_data_ptr_, body_data_size_);
       if (!okay) {
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 833b07a..5015410 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -363,9 +363,9 @@
   IRTSegmentState segment_state_;
 
   // Mem map where we store the indirect refs.
-  UniquePtr<MemMap> table_mem_map_;
+  std::unique_ptr<MemMap> table_mem_map_;
   // Mem map where we store the extended debugging info.
-  UniquePtr<MemMap> slot_mem_map_;
+  std::unique_ptr<MemMap> slot_mem_map_;
   /* bottom of the stack */
   mirror::Object** table_;
   /* bit mask, ORed into all irefs */
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 1cea24b..679c575 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -35,7 +35,7 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
-const char* GetInstructionSetString(const InstructionSet isa);
+const char* GetInstructionSetString(InstructionSet isa);
 InstructionSet GetInstructionSetFromString(const char* instruction_set);
 
 size_t GetInstructionSetPointerSize(InstructionSet isa);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 77d29dd..2dbcc80 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -252,7 +252,7 @@
   }
 
   Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg);
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   uintptr_t instrumentation_exit_pc = GetQuickInstrumentationExitPc();
   InstallStackVisitor visitor(thread, context.get(), instrumentation_exit_pc);
   visitor.WalkStack(true);
@@ -522,9 +522,9 @@
 void Instrumentation::InstrumentQuickAllocEntryPoints() {
   // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
   //       should be guarded by a lock.
-  DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.Load(), 0);
+  DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0);
   const bool enable_instrumentation =
-      quick_alloc_entry_points_instrumentation_counter_.FetchAndAdd(1) == 0;
+      quick_alloc_entry_points_instrumentation_counter_.FetchAndAddSequentiallyConsistent(1) == 0;
   if (enable_instrumentation) {
     SetEntrypointsInstrumented(true);
   }
@@ -533,9 +533,9 @@
 void Instrumentation::UninstrumentQuickAllocEntryPoints() {
   // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
   //       should be guarded by a lock.
-  DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.Load(), 0);
+  DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0);
   const bool disable_instrumentation =
-      quick_alloc_entry_points_instrumentation_counter_.FetchAndSub(1) == 1;
+      quick_alloc_entry_points_instrumentation_counter_.FetchAndSubSequentiallyConsistent(1) == 1;
   if (disable_instrumentation) {
     SetEntrypointsInstrumented(false);
   }
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 2a8cc63..817d104 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -16,13 +16,14 @@
 
 #include "intern_table.h"
 
+#include <memory>
+
 #include "gc/space/image_space.h"
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "thread.h"
-#include "UniquePtrCompat.h"
 #include "utf.h"
 
 namespace art {
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 7dd06c6..47d5e09 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -105,7 +105,7 @@
   bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable new_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
   Table strong_interns_ GUARDED_BY(Locks::intern_table_lock_);
-  std::vector<std::pair<int32_t, mirror::String*> > new_strong_intern_roots_
+  std::vector<std::pair<int32_t, mirror::String*>> new_strong_intern_roots_
       GUARDED_BY(Locks::intern_table_lock_);
   Table weak_interns_ GUARDED_BY(Locks::intern_table_lock_);
 };
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 0c7c8a9..b42af11 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -59,6 +59,15 @@
 using ::art::mirror::String;
 using ::art::mirror::Throwable;
 
+// b/14882674 Workaround stack overflow issue with clang
+#if defined(__clang__) && defined(__aarch64__)
+#define SOMETIMES_INLINE __attribute__((noinline))
+#define SOMETIMES_INLINE_KEYWORD
+#else
+#define SOMETIMES_INLINE ALWAYS_INLINE
+#define SOMETIMES_INLINE_KEYWORD inline
+#endif
+
 namespace art {
 namespace interpreter {
 
@@ -152,8 +161,8 @@
 // Handles iget-XXX and sget-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
-static inline bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
-                              const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
+                                                const Instruction* inst, uint16_t inst_data) {
   const bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
   const uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
   ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
@@ -211,7 +220,7 @@
 // Handles iget-quick, iget-wide-quick and iget-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type>
-static inline bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
   Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
   if (UNLIKELY(obj == nullptr)) {
     // We lost the reference to the field index so we cannot get a more
@@ -285,8 +294,8 @@
 // Handles iput-XXX and sput-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check, bool transaction_active>
-static inline bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
-                              const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
+                                                const Instruction* inst, uint16_t inst_data) {
   bool do_assignability_check = do_access_check;
   bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
   uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
@@ -363,7 +372,7 @@
 // Handles iput-quick, iput-wide-quick and iput-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type, bool transaction_active>
-static inline bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
   Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
   if (UNLIKELY(obj == nullptr)) {
     // We lost the reference to the field index so we cannot get a more
@@ -560,14 +569,13 @@
                                                              uint32_t dex_pc,
                                                              mirror::Object* this_object,
                                                              const instrumentation::Instrumentation* instrumentation)
-    ALWAYS_INLINE;
+SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE;
 
 static inline uint32_t FindNextInstructionFollowingException(Thread* self,
                                                              ShadowFrame& shadow_frame,
                                                              uint32_t dex_pc,
                                                              mirror::Object* this_object,
-                                                             const instrumentation::Instrumentation* instrumentation)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+                                                             const instrumentation::Instrumentation* instrumentation) {
   self->VerifyStack();
   ThrowLocation throw_location;
   mirror::Throwable* exception = self->GetException(&throw_location);
@@ -639,7 +647,7 @@
 
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                       \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                    \
   bool DoInvoke<_type, _is_range, _do_check>(Thread* self, ShadowFrame& shadow_frame,      \
                                              const Instruction* inst, uint16_t inst_data,  \
                                              JValue* result)
@@ -660,7 +668,7 @@
 
 // Explicitly instantiate all DoFieldGet functions.
 #define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check)                \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                           \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                        \
   bool DoFieldGet<_find_type, _field_type, _do_check>(Thread* self, ShadowFrame& shadow_frame, \
                                                       const Instruction* inst, uint16_t inst_data)
 
@@ -691,7 +699,7 @@
 
 // Explicitly instantiate all DoFieldPut functions.
 #define EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, _do_check, _transaction_active)                      \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                                 \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                                                   \
   bool DoFieldPut<_find_type, _field_type, _do_check, _transaction_active>(Thread* self, const ShadowFrame& shadow_frame, \
                                                                            const Instruction* inst, uint16_t inst_data)
 
@@ -724,7 +732,7 @@
 
 // Explicitly instantiate all DoInvokeVirtualQuick functions.
 #define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range)                    \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                 \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE              \
   bool DoInvokeVirtualQuick<_is_range>(Thread* self, ShadowFrame& shadow_frame,      \
                                        const Instruction* inst, uint16_t inst_data,  \
                                        JValue* result)
@@ -735,7 +743,7 @@
 
 // Explicitly instantiate all DoIGetQuick functions.
 #define EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(_field_type)                            \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                 \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE              \
   bool DoIGetQuick<_field_type>(ShadowFrame& shadow_frame, const Instruction* inst,  \
                                 uint16_t inst_data)
 
@@ -746,7 +754,7 @@
 
 // Explicitly instantiate all DoIPutQuick functions.
 #define EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, _transaction_active)        \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                  \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE               \
   bool DoIPutQuick<_field_type, _transaction_active>(const ShadowFrame& shadow_frame, \
                                                      const Instruction* inst,         \
                                                      uint16_t inst_data)
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index e0f9e5f..9a274f6 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -234,9 +234,9 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(MOVE_EXCEPTION) {
-    Throwable* exception = self->GetException(NULL);
-    self->ClearException();
+    Throwable* exception = self->GetException(nullptr);
     shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
+    self->ClearException();
     ADVANCE(1);
   }
   HANDLE_INSTRUCTION_END();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index c1d24f5..68759ad 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -21,7 +21,7 @@
 
 #define HANDLE_PENDING_EXCEPTION()                                                              \
   do {                                                                                          \
-    CHECK(self->IsExceptionPending());                                                          \
+    DCHECK(self->IsExceptionPending());                                                         \
     if (UNLIKELY(self->TestAllFlags())) {                                                       \
       CheckSuspend(self);                                                                       \
     }                                                                                           \
@@ -163,9 +163,9 @@
         break;
       case Instruction::MOVE_EXCEPTION: {
         PREAMBLE();
-        Throwable* exception = self->GetException(NULL);
-        self->ClearException();
+        Throwable* exception = self->GetException(nullptr);
         shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
+        self->ClearException();
         inst = inst->Next_1xx();
         break;
       }
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 00be016..05bfe0d 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -17,7 +17,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-
+#include <memory>
 #include <string>
 
 #include "atomic.h"
@@ -32,7 +32,6 @@
 #include "jdwp/jdwp_priv.h"
 #include "runtime.h"
 #include "thread-inl.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -107,8 +106,8 @@
                              Dbg::GetMethodName(method_id).c_str());
   VLOG(jdwp) << StringPrintf("        %d args:", arg_count);
 
-  UniquePtr<JdwpTag[]> argTypes(arg_count > 0 ? new JdwpTag[arg_count] : NULL);
-  UniquePtr<uint64_t[]> argValues(arg_count > 0 ? new uint64_t[arg_count] : NULL);
+  std::unique_ptr<JdwpTag[]> argTypes(arg_count > 0 ? new JdwpTag[arg_count] : NULL);
+  std::unique_ptr<uint64_t[]> argValues(arg_count > 0 ? new uint64_t[arg_count] : NULL);
   for (int32_t i = 0; i < arg_count; ++i) {
     argTypes[i] = request.ReadTag();
     size_t width = Dbg::GetTagWidth(argTypes[i]);
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 2419ca6..64e9f37 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -235,7 +235,7 @@
 JdwpState* JdwpState::Create(const JdwpOptions* options) {
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertNotHeld(self);
-  UniquePtr<JdwpState> state(new JdwpState(options));
+  std::unique_ptr<JdwpState> state(new JdwpState(options));
   switch (options->transport) {
     case kJdwpTransportSocket:
       InitSocketTransport(state.get(), options);
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 17a3de4..6f3317d 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -19,6 +19,7 @@
 #include <dlfcn.h>
 
 #include <cstdarg>
+#include <memory>
 #include <utility>
 #include <vector>
 
@@ -49,7 +50,6 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "utf.h"
-#include "UniquePtrCompat.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -722,7 +722,9 @@
   }
 
   static jint PushLocalFrame(JNIEnv* env, jint capacity) {
-    if (EnsureLocalCapacity(env, capacity, "PushLocalFrame") != JNI_OK) {
+    // TODO: SOA may not be necessary but I do it to please lock annotations.
+    ScopedObjectAccess soa(env);
+    if (EnsureLocalCapacity(soa, capacity, "PushLocalFrame") != JNI_OK) {
       return JNI_ERR;
     }
     static_cast<JNIEnvExt*>(env)->PushFrame(capacity);
@@ -737,7 +739,9 @@
   }
 
   static jint EnsureLocalCapacity(JNIEnv* env, jint desired_capacity) {
-    return EnsureLocalCapacity(env, desired_capacity, "EnsureLocalCapacity");
+    // TODO: SOA may not be necessary but I do it to please lock annotations.
+    ScopedObjectAccess soa(env);
+    return EnsureLocalCapacity(soa, desired_capacity, "EnsureLocalCapacity");
   }
 
   static jobject NewGlobalRef(JNIEnv* env, jobject obj) {
@@ -795,6 +799,7 @@
     if (obj == nullptr) {
       return;
     }
+    ScopedObjectAccess soa(env);
     IndirectReferenceTable& locals = reinterpret_cast<JNIEnvExt*>(env)->locals;
 
     uint32_t cookie = reinterpret_cast<JNIEnvExt*>(env)->local_ref_cookie;
@@ -2457,18 +2462,17 @@
   }
 
  private:
-  static jint EnsureLocalCapacity(JNIEnv* env, jint desired_capacity,
-                                  const char* caller) {
+  static jint EnsureLocalCapacity(ScopedObjectAccess& soa, jint desired_capacity,
+                                  const char* caller) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // TODO: we should try to expand the table if necessary.
     if (desired_capacity < 0 || desired_capacity > static_cast<jint>(kLocalsMax)) {
       LOG(ERROR) << "Invalid capacity given to " << caller << ": " << desired_capacity;
       return JNI_ERR;
     }
     // TODO: this isn't quite right, since "capacity" includes holes.
-    size_t capacity = static_cast<JNIEnvExt*>(env)->locals.Capacity();
+    const size_t capacity = soa.Env()->locals.Capacity();
     bool okay = (static_cast<jint>(kLocalsMax - capacity) >= desired_capacity);
     if (!okay) {
-      ScopedObjectAccess soa(env);
       soa.Self()->ThrowOutOfMemoryError(caller);
     }
     return okay ? JNI_OK : JNI_ERR;
@@ -2892,13 +2896,14 @@
   monitors.Dump(os);
 }
 
-void JNIEnvExt::PushFrame(int /*capacity*/) {
+void JNIEnvExt::PushFrame(int capacity) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  UNUSED(capacity);  // cpplint gets confused with (int) and thinks its a cast.
   // TODO: take 'capacity' into account.
   stacked_local_ref_cookies.push_back(local_ref_cookie);
   local_ref_cookie = locals.GetSegmentState();
 }
 
-void JNIEnvExt::PopFrame() {
+void JNIEnvExt::PopFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   locals.SetSegmentState(local_ref_cookie);
   local_ref_cookie = stacked_local_ref_cookies.back();
   stacked_local_ref_cookies.pop_back();
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 5964947..37195eb 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -170,7 +170,7 @@
   uint32_t local_ref_cookie;
 
   // JNI local references.
-  IndirectReferenceTable locals;
+  IndirectReferenceTable locals GUARDED_BY(Locks::mutator_lock_);
 
   // Stack of cookies corresponding to PushLocalFrame/PopLocalFrame calls.
   // TODO: to avoid leaks (and bugs), we need to clear this vector on entry (or return)
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 83e9b10..3429827 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -539,8 +539,8 @@
   ExpectException(aioobe_); \
   \
   /* Prepare a couple of buffers. */ \
-  UniquePtr<scalar_type[]> src_buf(new scalar_type[size]); \
-  UniquePtr<scalar_type[]> dst_buf(new scalar_type[size]); \
+  std::unique_ptr<scalar_type[]> src_buf(new scalar_type[size]); \
+  std::unique_ptr<scalar_type[]> dst_buf(new scalar_type[size]); \
   for (jsize i = 0; i < size; ++i) { src_buf[i] = scalar_type(i); } \
   for (jsize i = 0; i < size; ++i) { dst_buf[i] = scalar_type(-1); } \
   \
diff --git a/runtime/leb128_test.cc b/runtime/leb128_test.cc
index d75d5c2..5d157dc 100644
--- a/runtime/leb128_test.cc
+++ b/runtime/leb128_test.cc
@@ -253,8 +253,8 @@
 }
 
 TEST(Leb128Test, Speed) {
-  UniquePtr<Histogram<uint64_t> > enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
-  UniquePtr<Histogram<uint64_t> > dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
+  std::unique_ptr<Histogram<uint64_t>> enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
+  std::unique_ptr<Histogram<uint64_t>> dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
   Leb128EncodingVector builder;
   // Push back 1024 chunks of 1024 values measuring encoding speed.
   uint64_t last_time = NanoTime();
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index ffafc85..49e0b54 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -18,8 +18,13 @@
 
 #include <inttypes.h>
 #include <backtrace/BacktraceMap.h>
+#include <memory>
 
-#include "UniquePtrCompat.h"
+// See CreateStartPos below.
+#ifdef __BIONIC__
+#include <sys/auxv.h>
+#endif
+
 #include "base/stringprintf.h"
 #include "ScopedFd.h"
 #include "utils.h"
@@ -47,10 +52,61 @@
 }
 
 #if defined(__LP64__) && !defined(__x86_64__)
-// Where to start with low memory allocation. The first 64KB is protected by SELinux.
+// Handling mem_map in 32b address range for 64b architectures that do not support MAP_32BIT.
+
+// The regular start of memory allocations. The first 64KB is protected by SELinux.
 static constexpr uintptr_t LOW_MEM_START = 64 * KB;
 
-uintptr_t MemMap::next_mem_pos_ = LOW_MEM_START;   // first page to check for low-mem extent
+// Generate random starting position.
+// To not interfere with image position, take the image's address and only place it below. Current
+// formula (sketch):
+//
+// ART_BASE_ADDR      = 0001XXXXXXXXXXXXXXX
+// ----------------------------------------
+//                    = 0000111111111111111
+// & ~(kPageSize - 1) =~0000000000000001111
+// ----------------------------------------
+// mask               = 0000111111111110000
+// & random data      = YYYYYYYYYYYYYYYYYYY
+// -----------------------------------
+// tmp                = 0000YYYYYYYYYYY0000
+// + LOW_MEM_START    = 0000000000001000000
+// --------------------------------------
+// start
+//
+// getauxval as an entropy source is exposed in Bionic, but not in glibc before 2.16. When we
+// do not have Bionic, simply start with LOW_MEM_START.
+
+// Function is standalone so it can be tested somewhat in mem_map_test.cc.
+#ifdef __BIONIC__
+uintptr_t CreateStartPos(uint64_t input) {
+  CHECK_NE(0, ART_BASE_ADDRESS);
+
+  // Start with all bits below highest bit in ART_BASE_ADDRESS.
+  constexpr size_t leading_zeros = CLZ(static_cast<uint32_t>(ART_BASE_ADDRESS));
+  constexpr uintptr_t mask_ones = (1 << (31 - leading_zeros)) - 1;
+
+  // Lowest (usually 12) bits are not used, as aligned by page size.
+  constexpr uintptr_t mask = mask_ones & ~(kPageSize - 1);
+
+  // Mask input data.
+  return (input & mask) + LOW_MEM_START;
+}
+#endif
+
+static uintptr_t GenerateNextMemPos() {
+#ifdef __BIONIC__
+  uint8_t* random_data = reinterpret_cast<uint8_t*>(getauxval(AT_RANDOM));
+  // The lower 8B are taken for the stack guard. Use the upper 8B (with mask).
+  return CreateStartPos(*reinterpret_cast<uintptr_t*>(random_data + 8));
+#else
+  // No auxv on host, see above.
+  return LOW_MEM_START;
+#endif
+}
+
+// Initialize linear scan to random position.
+uintptr_t MemMap::next_mem_pos_ = GenerateNextMemPos();
 #endif
 
 static bool CheckMapRequest(byte* expected_ptr, void* actual_ptr, size_t byte_count,
@@ -76,7 +132,7 @@
   uintptr_t expected = reinterpret_cast<uintptr_t>(expected_ptr);
   uintptr_t limit = expected + byte_count;
 
-  UniquePtr<BacktraceMap> map(BacktraceMap::Create(getpid()));
+  std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid()));
   if (!map->Build()) {
     *error_msg << StringPrintf("Failed to build process map to determine why mmap returned "
                                "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR, actual, expected);
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 4255d17..1411856 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -28,6 +28,12 @@
 namespace art {
 
 // Used to keep track of mmap segments.
+//
+// On 64b systems not supporting MAP_32BIT, the implementation of MemMap will do a linear scan
+// for free pages. For security, the start of this scan should be randomized. This requires a
+// dynamic initializer.
+// For this to work, it is paramount that there are no other static initializers that access MemMap.
+// Otherwise, calls might see uninitialized values.
 class MemMap {
  public:
   // Request an anonymous region of length 'byte_count' and a requested base address.
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index b26f563..c108a5f 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -16,7 +16,8 @@
 
 #include "mem_map.h"
 
-#include "UniquePtrCompat.h"
+#include <memory>
+
 #include "gtest/gtest.h"
 
 namespace art {
@@ -83,11 +84,44 @@
     }
     delete m1;
   }
+
+#if defined(__LP64__) && !defined(__x86_64__)
+  static uintptr_t GetLinearScanPos() {
+    return MemMap::next_mem_pos_;
+  }
+#endif
 };
 
+#if defined(__LP64__) && !defined(__x86_64__)
+
+#ifdef __BIONIC__
+extern uintptr_t CreateStartPos(uint64_t input);
+#endif
+
+TEST_F(MemMapTest, Start) {
+  uintptr_t start = GetLinearScanPos();
+  EXPECT_LE(64 * KB, start);
+  EXPECT_LT(start, static_cast<uintptr_t>(ART_BASE_ADDRESS));
+
+#ifdef __BIONIC__
+  // Test a couple of values. Make sure they are different.
+  uintptr_t last = 0;
+  for (size_t i = 0; i < 100; ++i) {
+    uintptr_t random_start = CreateStartPos(i * kPageSize);
+    EXPECT_NE(last, random_start);
+    last = random_start;
+  }
+
+  // Even on max, should be below ART_BASE_ADDRESS.
+  EXPECT_LT(CreateStartPos(~0), static_cast<uintptr_t>(ART_BASE_ADDRESS));
+#endif
+  // End of test.
+}
+#endif
+
 TEST_F(MemMapTest, MapAnonymousEmpty) {
   std::string error_msg;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
                                              nullptr,
                                              0,
                                              PROT_READ,
@@ -108,7 +142,7 @@
 #ifdef __LP64__
 TEST_F(MemMapTest, MapAnonymousEmpty32bit) {
   std::string error_msg;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
                                              nullptr,
                                              kPageSize,
                                              PROT_READ | PROT_WRITE,
@@ -123,7 +157,7 @@
 TEST_F(MemMapTest, MapAnonymousExactAddr) {
   std::string error_msg;
   // Map at an address that should work, which should succeed.
-  UniquePtr<MemMap> map0(MemMap::MapAnonymous("MapAnonymous0",
+  std::unique_ptr<MemMap> map0(MemMap::MapAnonymous("MapAnonymous0",
                                               reinterpret_cast<byte*>(ART_BASE_ADDRESS),
                                               kPageSize,
                                               PROT_READ | PROT_WRITE,
@@ -133,7 +167,7 @@
   ASSERT_TRUE(error_msg.empty());
   ASSERT_TRUE(map0->BaseBegin() == reinterpret_cast<void*>(ART_BASE_ADDRESS));
   // Map at an unspecified address, which should succeed.
-  UniquePtr<MemMap> map1(MemMap::MapAnonymous("MapAnonymous1",
+  std::unique_ptr<MemMap> map1(MemMap::MapAnonymous("MapAnonymous1",
                                               nullptr,
                                               kPageSize,
                                               PROT_READ | PROT_WRITE,
@@ -143,7 +177,7 @@
   ASSERT_TRUE(error_msg.empty());
   ASSERT_TRUE(map1->BaseBegin() != nullptr);
   // Attempt to map at the same address, which should fail.
-  UniquePtr<MemMap> map2(MemMap::MapAnonymous("MapAnonymous2",
+  std::unique_ptr<MemMap> map2(MemMap::MapAnonymous("MapAnonymous2",
                                               reinterpret_cast<byte*>(map1->BaseBegin()),
                                               kPageSize,
                                               PROT_READ | PROT_WRITE,
@@ -165,7 +199,7 @@
 
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
   std::string error_msg;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
                                              reinterpret_cast<byte*>(0x71000000),
                                              0x21000000,
                                              PROT_READ | PROT_WRITE,
@@ -180,7 +214,7 @@
   std::string error_msg;
   uintptr_t ptr = 0;
   ptr -= kPageSize;  // Now it's close to the top.
-  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousOverflow",
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousOverflow",
                                              reinterpret_cast<byte*>(ptr),
                                              2 * kPageSize,  // brings it over the top.
                                              PROT_READ | PROT_WRITE,
@@ -193,7 +227,7 @@
 #ifdef __LP64__
 TEST_F(MemMapTest, MapAnonymousLow4GBExpectedTooHigh) {
   std::string error_msg;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousLow4GBExpectedTooHigh",
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousLow4GBExpectedTooHigh",
                                              reinterpret_cast<byte*>(UINT64_C(0x100000000)),
                                              kPageSize,
                                              PROT_READ | PROT_WRITE,
@@ -205,7 +239,7 @@
 
 TEST_F(MemMapTest, MapAnonymousLow4GBRangeTooHigh) {
   std::string error_msg;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousLow4GBRangeTooHigh",
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousLow4GBRangeTooHigh",
                                              reinterpret_cast<byte*>(0xF0000000),
                                              0x20000000,
                                              PROT_READ | PROT_WRITE,
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index 86c5c3f..b3b1b71 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -32,7 +32,8 @@
 // TODO: Get global references for these
 Class* ArtField::java_lang_reflect_ArtField_ = NULL;
 
-ArtField* ArtField::FromReflectedField(const ScopedObjectAccess& soa, jobject jlr_field) {
+ArtField* ArtField::FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
+                                       jobject jlr_field) {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_reflect_Field_artField);
   mirror::ArtField* field = f->GetObject(soa.Decode<mirror::Object*>(jlr_field))->AsArtField();
   DCHECK(field != nullptr);
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 029bd5a..30cd180 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -27,14 +27,15 @@
 namespace art {
 
 struct ArtFieldOffsets;
-class ScopedObjectAccess;
+class ScopedObjectAccessAlreadyRunnable;
 
 namespace mirror {
 
 // C++ mirror of java.lang.reflect.ArtField
 class MANAGED ArtField : public Object {
  public:
-  static ArtField* FromReflectedField(const ScopedObjectAccess& soa, jobject jlr_field)
+  static ArtField* FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
+                                      jobject jlr_field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -121,7 +122,7 @@
   void SetObj(Object* object, Object* new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangReflectArtField() {
-    DCHECK(java_lang_reflect_ArtField_ != NULL);
+    DCHECK(java_lang_reflect_ArtField_ != nullptr);
     return java_lang_reflect_ArtField_;
   }
 
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index cde977b..39efa58 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -62,17 +62,17 @@
 }
 
 inline ObjectArray<String>* ArtMethod::GetDexCacheStrings() {
-  return GetFieldObject<ObjectArray<String> >(
+  return GetFieldObject<ObjectArray<String>>(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_strings_));
 }
 
 inline ObjectArray<ArtMethod>* ArtMethod::GetDexCacheResolvedMethods() {
-  return GetFieldObject<ObjectArray<ArtMethod> >(
+  return GetFieldObject<ObjectArray<ArtMethod>>(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_methods_));
 }
 
 inline ObjectArray<Class>* ArtMethod::GetDexCacheResolvedTypes() {
-  return GetFieldObject<ObjectArray<Class> >(
+  return GetFieldObject<ObjectArray<Class>>(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_));
 }
 
@@ -153,6 +153,65 @@
   SetEntryPointFromPortableCompiledCode(reinterpret_cast<void*>(code_offset));
 }
 
+inline const void* ArtMethod::GetQuickOatEntryPoint() {
+  if (IsPortableCompiled() || IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
+    return nullptr;
+  }
+  Runtime* runtime = Runtime::Current();
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this);
+  // On failure, instead of nullptr we get the quick-generic-jni-trampoline for native method
+  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
+  // for non-native methods.
+  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
+  if (UNLIKELY(entry_point == GetQuickToInterpreterBridge()) ||
+      UNLIKELY(entry_point == runtime->GetClassLinker()->GetQuickGenericJniTrampoline())) {
+    return nullptr;
+  }
+  return entry_point;
+}
+
+inline const void* ArtMethod::GetQuickOatCodePointer() {
+  return EntryPointToCodePointer(GetQuickOatEntryPoint());
+}
+
+inline const uint8_t* ArtMethod::GetMappingTable() {
+  const void* code_pointer = GetQuickOatCodePointer();
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  return GetMappingTable(code_pointer);
+}
+
+inline const uint8_t* ArtMethod::GetMappingTable(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].mapping_table_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
+inline const uint8_t* ArtMethod::GetVmapTable() {
+  const void* code_pointer = GetQuickOatCodePointer();
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  return GetVmapTable(code_pointer);
+}
+
+inline const uint8_t* ArtMethod::GetVmapTable(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
 inline void ArtMethod::SetOatNativeGcMapOffset(uint32_t gc_map_offset) {
   DCHECK(!Runtime::Current()->IsStarted());
   SetNativeGcMap(reinterpret_cast<uint8_t*>(gc_map_offset));
@@ -196,6 +255,17 @@
   return result;
 }
 
+inline uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) {
+  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
+  return pc - reinterpret_cast<uintptr_t>(code);
+}
+
+inline uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc, const void* quick_entry_point) {
+  DCHECK(quick_entry_point != GetQuickToInterpreterBridge());
+  DCHECK(quick_entry_point == Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this));
+  return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
+}
+
 template<VerifyObjectFlags kVerifyFlags>
 inline void ArtMethod::SetNativeMethod(const void* native_method) {
   SetFieldPtr<false, true, kVerifyFlags>(
@@ -233,6 +303,12 @@
   }
 
   const void* code_pointer = EntryPointToCodePointer(entry_point);
+  return GetQuickFrameInfo(code_pointer);
+}
+
+inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
   return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
 }
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 495ae2d..af544fd 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -48,7 +48,8 @@
 // TODO: get global references for these
 Class* ArtMethod::java_lang_reflect_ArtMethod_ = NULL;
 
-ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccess& soa, jobject jlr_method) {
+ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
+                                          jobject jlr_method) {
   mirror::ArtField* f =
       soa.DecodeField(WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod);
   mirror::ArtMethod* method = f->GetObject(soa.Decode<mirror::Object*>(jlr_method))->AsArtMethod();
@@ -164,23 +165,21 @@
   return result;
 }
 
-uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) {
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-  return pc - reinterpret_cast<uintptr_t>(code);
-}
-
 uint32_t ArtMethod::ToDexPc(const uintptr_t pc, bool abort_on_failure) {
   if (IsPortableCompiled()) {
     // Portable doesn't use the machine pc, we just use dex pc instead.
     return static_cast<uint32_t>(pc);
   }
-  MappingTable table(GetMappingTable());
+  const void* entry_point = GetQuickOatEntryPoint();
+  MappingTable table(
+      entry_point != nullptr ? GetMappingTable(EntryPointToCodePointer(entry_point)) : nullptr);
   if (table.TotalSize() == 0) {
+    // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
+    // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
     DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
     return DexFile::kDexNoIndex;   // Special no mapping case
   }
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(code);
+  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   // Assume the caller wants a pc-to-dex mapping so check here first.
   typedef MappingTable::PcToDexIterator It;
   for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
@@ -197,14 +196,16 @@
   }
   if (abort_on_failure) {
       LOG(FATAL) << "Failed to find Dex offset for PC offset " << reinterpret_cast<void*>(sought_offset)
-             << "(PC " << reinterpret_cast<void*>(pc) << ", code=" << code
+             << "(PC " << reinterpret_cast<void*>(pc) << ", entry_point=" << entry_point
              << ") in " << PrettyMethod(this);
   }
   return DexFile::kDexNoIndex;
 }
 
 uintptr_t ArtMethod::ToNativePc(const uint32_t dex_pc) {
-  MappingTable table(GetMappingTable());
+  const void* entry_point = GetQuickOatEntryPoint();
+  MappingTable table(
+      entry_point != nullptr ? GetMappingTable(EntryPointToCodePointer(entry_point)) : nullptr);
   if (table.TotalSize() == 0) {
     DCHECK_EQ(dex_pc, 0U);
     return 0;   // Special no mapping/pc == 0 case
@@ -213,16 +214,14 @@
   typedef MappingTable::DexToPcIterator It;
   for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
     if (cur.DexPc() == dex_pc) {
-      const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-      return reinterpret_cast<uintptr_t>(code) + cur.NativePcOffset();
+      return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
     }
   }
   // Now check pc-to-dex mappings.
   typedef MappingTable::PcToDexIterator It2;
   for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
     if (cur.DexPc() == dex_pc) {
-      const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-      return reinterpret_cast<uintptr_t>(code) + cur.NativePcOffset();
+      return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
     }
   }
   LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
@@ -378,43 +377,5 @@
   RegisterNative(self, GetJniDlsymLookupStub(), false);
 }
 
-const void* ArtMethod::GetOatCodePointer() {
-  if (IsPortableCompiled() || IsNative() || IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
-    return nullptr;
-  }
-  Runtime* runtime = Runtime::Current();
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this);
-  // On failure, instead of nullptr we get the quick-to-interpreter-bridge (but not the trampoline).
-  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
-  if (entry_point == GetQuickToInterpreterBridge()) {
-    return nullptr;
-  }
-  return EntryPointToCodePointer(entry_point);
-}
-
-const uint8_t* ArtMethod::GetMappingTable() {
-  const void* code = GetOatCodePointer();
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].mapping_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
-const uint8_t* ArtMethod::GetVmapTable() {
-  const void* code = GetOatCodePointer();
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].vmap_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 3950a98..34fe0bf 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -32,7 +32,7 @@
 union JValue;
 struct MethodClassOffsets;
 class MethodHelper;
-class ScopedObjectAccess;
+class ScopedObjectAccessAlreadyRunnable;
 class StringPiece;
 class ShadowFrame;
 
@@ -46,7 +46,8 @@
 // C++ mirror of java.lang.reflect.Method and java.lang.reflect.Constructor
 class MANAGED ArtMethod : public Object {
  public:
-  static ArtMethod* FromReflectedMethod(const ScopedObjectAccess& soa, jobject jlr_method)
+  static ArtMethod* FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
+                                        jobject jlr_method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Class* GetDeclaringClass() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -296,14 +297,20 @@
     return reinterpret_cast<const void*>(code);
   }
 
+  // Actual entry point pointer to compiled oat code or nullptr.
+  const void* GetQuickOatEntryPoint() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Actual pointer to compiled oat code or nullptr.
-  const void* GetOatCodePointer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const void* GetQuickOatCodePointer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a MappingTable instance for convenient access.
   const uint8_t* GetMappingTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetMappingTable(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a VmapTable instance for convenient access.
   const uint8_t* GetVmapTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetVmapTable(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const uint8_t* GetNativeGcMap() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldPtr<uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_));
@@ -327,9 +334,17 @@
   }
 
   QuickMethodFrameInfo GetQuickFrameInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  QuickMethodFrameInfo GetQuickFrameInfo(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t GetReturnPcOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFrameSizeInBytes() - kPointerSize;
+    return GetReturnPcOffsetInBytes(GetFrameSizeInBytes());
+  }
+
+  size_t GetReturnPcOffsetInBytes(uint32_t frame_size_in_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK_EQ(frame_size_in_bytes, GetFrameSizeInBytes());
+    return frame_size_in_bytes - kPointerSize;
   }
 
   size_t GetHandleScopeOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -370,6 +385,8 @@
   bool IsImtConflictMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uintptr_t NativePcOffset(const uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uintptr_t NativePcOffset(const uintptr_t pc, const void* quick_entry_point)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Converts a native PC to a dex PC.
   uint32_t ToDexPc(const uintptr_t pc, bool abort_on_failure = true)
@@ -390,9 +407,8 @@
 
   static void SetClass(Class* java_lang_reflect_ArtMethod);
 
-  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   static Class* GetJavaLangReflectArtMethod() {
-    // This does not need a RB because it is a root.
+    DCHECK(java_lang_reflect_ArtMethod_ != nullptr);
     return java_lang_reflect_ArtMethod_;
   }
 
@@ -407,13 +423,13 @@
   HeapReference<Class> declaring_class_;
 
   // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
-  HeapReference<ObjectArray<ArtMethod> > dex_cache_resolved_methods_;
+  HeapReference<ObjectArray<ArtMethod>> dex_cache_resolved_methods_;
 
   // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
-  HeapReference<ObjectArray<Class> > dex_cache_resolved_types_;
+  HeapReference<ObjectArray<Class>> dex_cache_resolved_types_;
 
   // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
-  HeapReference<ObjectArray<String> > dex_cache_strings_;
+  HeapReference<ObjectArray<String>> dex_cache_strings_;
 
   // Method dispatch from the interpreter invokes this pointer which may cause a bridge into
   // compiled code.
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index b2d8288..512a66f 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -64,12 +64,12 @@
 
 inline ObjectArray<ArtMethod>* Class::GetDirectMethods() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_));
 }
 
 inline void Class::SetDirectMethods(ObjectArray<ArtMethod>* new_direct_methods)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtMethod> >(
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtMethod>>(
       OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_)));
   DCHECK_NE(0, new_direct_methods->GetLength());
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), new_direct_methods);
@@ -82,7 +82,7 @@
 inline void Class::SetDirectMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* direct_methods =
-      GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_));
+      GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_));
   direct_methods->Set<false>(i, f);
 }
 
@@ -94,7 +94,7 @@
 template<VerifyObjectFlags kVerifyFlags>
 inline ObjectArray<ArtMethod>* Class::GetVirtualMethods() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
 }
 
 inline void Class::SetVirtualMethods(ObjectArray<ArtMethod>* new_virtual_methods) {
@@ -122,18 +122,18 @@
 inline void Class::SetVirtualMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* virtual_methods =
-      GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
+      GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
   virtual_methods->Set<false>(i, f);
 }
 
 inline ObjectArray<ArtMethod>* Class::GetVTable() {
   DCHECK(IsResolved() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
 
 inline ObjectArray<ArtMethod>* Class::GetVTableDuringLinking() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
 
 inline void Class::SetVTable(ObjectArray<ArtMethod>* new_vtable) {
@@ -141,7 +141,7 @@
 }
 
 inline ObjectArray<ArtMethod>* Class::GetImTable() {
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, imtable_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, imtable_));
 }
 
 inline void Class::SetImTable(ObjectArray<ArtMethod>* new_imtable) {
@@ -362,18 +362,18 @@
 
 inline void Class::SetIFields(ObjectArray<ArtField>* new_ifields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField> >(OFFSET_OF_OBJECT_MEMBER(Class, ifields_)));
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_)));
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), new_ifields);
 }
 
 inline ObjectArray<ArtField>* Class::GetSFields() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtField> >(OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
+  return GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
 }
 
 inline void Class::SetSFields(ObjectArray<ArtField>* new_sfields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField> >(OFFSET_OF_OBJECT_MEMBER(Class, sfields_)));
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_)));
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), new_sfields);
 }
 
@@ -389,7 +389,7 @@
 
 inline void Class::SetStaticField(uint32_t i, ArtField* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField> >(
+  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField>>(
       OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
   sfields->Set<false>(i, f);
 }
@@ -405,7 +405,7 @@
 
 inline void Class::SetInstanceField(uint32_t i, ArtField* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField> >(
+  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField>>(
       OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
   ifields->Set<false>(i, f);
 }
@@ -509,17 +509,12 @@
   VisitStaticFieldsReferences<kVisitClass>(this, visitor);
 }
 
-template<ReadBarrierOption kReadBarrierOption>
-inline bool Class::IsArtFieldClass() {
-  Class* java_lang_Class = GetClass<kVerifyNone, kReadBarrierOption>();
-  Class* java_lang_reflect_ArtField =
-      java_lang_Class->GetInstanceField(0)->GetClass<kVerifyNone, kReadBarrierOption>();
-  return this == java_lang_reflect_ArtField;
+inline bool Class::IsArtFieldClass() const {
+  return this == ArtField::GetJavaLangReflectArtField();
 }
 
-template<ReadBarrierOption kReadBarrierOption>
-inline bool Class::IsArtMethodClass() {
-  return this == ArtMethod::GetJavaLangReflectArtMethod<kReadBarrierOption>();
+inline bool Class::IsArtMethodClass() const {
+  return this == ArtMethod::GetJavaLangReflectArtMethod();
 }
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index a283f60..40c9975 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -371,15 +371,13 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClassClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsStringClass() const;
+  bool IsStringClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool IsArtFieldClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsArtFieldClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool IsArtMethodClass();
+  bool IsArtMethodClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static MemberOffset ComponentTypeOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, component_type_);
@@ -904,7 +902,7 @@
   HeapReference<DexCache> dex_cache_;
 
   // static, private, and <init> methods
-  HeapReference<ObjectArray<ArtMethod> > direct_methods_;
+  HeapReference<ObjectArray<ArtMethod>> direct_methods_;
 
   // instance fields
   //
@@ -916,7 +914,7 @@
   // All instance fields that refer to objects are guaranteed to be at
   // the beginning of the field list.  num_reference_instance_fields_
   // specifies the number of reference fields.
-  HeapReference<ObjectArray<ArtField> > ifields_;
+  HeapReference<ObjectArray<ArtField>> ifields_;
 
   // The interface table (iftable_) contains pairs of a interface class and an array of the
   // interface methods. There is one pair per interface supported by this class.  That means one
@@ -932,7 +930,7 @@
   HeapReference<IfTable> iftable_;
 
   // Interface method table (imt), for quick "invoke-interface".
-  HeapReference<ObjectArray<ArtMethod> > imtable_;
+  HeapReference<ObjectArray<ArtMethod>> imtable_;
 
   // Descriptor for the class such as "java.lang.Class" or "[C". Lazily initialized by ComputeName
   HeapReference<String> name_;
@@ -947,13 +945,13 @@
   HeapReference<Class> verify_error_class_;
 
   // Virtual methods defined in this class; invoked through vtable.
-  HeapReference<ObjectArray<ArtMethod> > virtual_methods_;
+  HeapReference<ObjectArray<ArtMethod>> virtual_methods_;
 
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
   // appended. For abstract classes, methods may be created in the vtable that aren't in
   // virtual_ methods_ for miranda methods.
-  HeapReference<ObjectArray<ArtMethod> > vtable_;
+  HeapReference<ObjectArray<ArtMethod>> vtable_;
 
   // Access flags; low 16 bits are defined by VM spec.
   uint32_t access_flags_;
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 11a4002..65a5026 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -123,22 +123,22 @@
   }
 
   ObjectArray<String>* GetStrings() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<String> >(StringsOffset());
+    return GetFieldObject< ObjectArray<String>>(StringsOffset());
   }
 
   ObjectArray<Class>* GetResolvedTypes() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<Class> >(
+    return GetFieldObject<ObjectArray<Class>>(
         OFFSET_OF_OBJECT_MEMBER(DexCache, resolved_types_));
   }
 
   ObjectArray<ArtMethod>* GetResolvedMethods() ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<ArtMethod> >(ResolvedMethodsOffset());
+    return GetFieldObject< ObjectArray<ArtMethod>>(ResolvedMethodsOffset());
   }
 
   ObjectArray<ArtField>* GetResolvedFields() ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<ArtField> >(ResolvedFieldsOffset());
+    return GetFieldObject<ObjectArray<ArtField>>(ResolvedFieldsOffset());
   }
 
   const DexFile* GetDexFile() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -153,10 +153,10 @@
  private:
   HeapReference<Object> dex_;
   HeapReference<String> location_;
-  HeapReference<ObjectArray<ArtField> > resolved_fields_;
-  HeapReference<ObjectArray<ArtMethod> > resolved_methods_;
-  HeapReference<ObjectArray<Class> > resolved_types_;
-  HeapReference<ObjectArray<String> > strings_;
+  HeapReference<ObjectArray<ArtField>> resolved_fields_;
+  HeapReference<ObjectArray<ArtMethod>> resolved_methods_;
+  HeapReference<ObjectArray<Class>> resolved_types_;
+  HeapReference<ObjectArray<String>> strings_;
   uint64_t dex_file_;
 
   friend struct art::DexCacheOffsets;  // for verifying offset information
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index a2072a2..62ab2c1 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -217,8 +217,7 @@
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtField() {
-  return GetClass<kVerifyFlags, kReadBarrierOption>()->
-      template IsArtFieldClass<kReadBarrierOption>();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsArtFieldClass();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -229,8 +228,7 @@
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtMethod() {
-  return GetClass<kVerifyFlags, kReadBarrierOption>()->
-      template IsArtMethodClass<kReadBarrierOption>();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsArtMethodClass();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 04905a5..69e5a84 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -139,10 +139,10 @@
   static AtomicInteger seed(987654321 + std::time(nullptr));
   int32_t expected_value, new_value;
   do {
-    expected_value = static_cast<uint32_t>(seed.Load());
+    expected_value = static_cast<uint32_t>(seed.LoadRelaxed());
     new_value = expected_value * 1103515245 + 12345;
   } while ((expected_value & LockWord::kHashMask) == 0 ||
-      !seed.CompareAndSwap(expected_value, new_value));
+      !seed.CompareExchangeWeakRelaxed(expected_value, new_value));
   return expected_value & LockWord::kHashMask;
 }
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 942a271..9b6e901 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -245,7 +245,7 @@
   DCHECK_GE(new_length, 0);
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
-  Handle<ObjectArray<T> > h_this(hs.NewHandle(this));
+  Handle<ObjectArray<T>> h_this(hs.NewHandle(this));
   gc::Heap* heap = Runtime::Current()->GetHeap();
   gc::AllocatorType allocator_type = heap->IsMovableObject(this) ? heap->GetCurrentAllocator() :
       heap->GetCurrentNonMovingAllocator();
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index e24602a..18e50ce 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -18,6 +18,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <memory>
 
 #include "array-inl.h"
 #include "art_field-inl.h"
@@ -36,7 +37,6 @@
 #include "object_array-inl.h"
 #include "handle_scope-inl.h"
 #include "string-inl.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 namespace mirror {
@@ -48,7 +48,7 @@
                     const char* utf16_expected_le,
                     int32_t expected_hash)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UniquePtr<uint16_t[]> utf16_expected(new uint16_t[expected_utf16_length]);
+    std::unique_ptr<uint16_t[]> utf16_expected(new uint16_t[expected_utf16_length]);
     for (int32_t i = 0; i < expected_utf16_length; i++) {
       uint16_t ch = (((utf16_expected_le[i*2 + 0] & 0xff) << 8) |
                      ((utf16_expected_le[i*2 + 1] & 0xff) << 0));
@@ -116,7 +116,7 @@
 TEST_F(ObjectTest, AllocObjectArray) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<2> hs(soa.Self());
-  Handle<ObjectArray<Object> > oa(
+  Handle<ObjectArray<Object>> oa(
       hs.NewHandle(class_linker_->AllocObjectArray<Object>(soa.Self(), 2)));
   EXPECT_EQ(2, oa->GetLength());
   EXPECT_TRUE(oa->Get(0) == NULL);
diff --git a/runtime/mirror/proxy.h b/runtime/mirror/proxy.h
index 5f9cceb..6e4947e 100644
--- a/runtime/mirror/proxy.h
+++ b/runtime/mirror/proxy.h
@@ -30,18 +30,18 @@
 class MANAGED SynthesizedProxyClass : public Class {
  public:
   ObjectArray<Class>* GetInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<Class> >(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
+    return GetFieldObject<ObjectArray<Class>>(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
                                                                        interfaces_));
   }
 
-  ObjectArray<ObjectArray<Class> >* GetThrows()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<ObjectArray<Class> > >(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
+  ObjectArray<ObjectArray<Class>>* GetThrows()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<ObjectArray<Class>>>(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
                                                                                      throws_));
   }
 
  private:
-  HeapReference<ObjectArray<Class> > interfaces_;
-  HeapReference<ObjectArray<ObjectArray<Class> > > throws_;
+  HeapReference<ObjectArray<Class>> interfaces_;
+  HeapReference<ObjectArray<ObjectArray<Class>>> throws_;
   DISALLOW_IMPLICIT_CONSTRUCTORS(SynthesizedProxyClass);
 };
 
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 53e4a6f..f783edb 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -99,12 +99,12 @@
 
 int32_t Monitor::GetHashCode() {
   while (!HasHashCode()) {
-    if (hash_code_.CompareAndSwap(0, mirror::Object::GenerateIdentityHashCode())) {
+    if (hash_code_.CompareExchangeWeakRelaxed(0, mirror::Object::GenerateIdentityHashCode())) {
       break;
     }
   }
   DCHECK(HasHashCode());
-  return hash_code_.Load();
+  return hash_code_.LoadRelaxed();
 }
 
 bool Monitor::Install(Thread* self) {
@@ -119,7 +119,7 @@
       break;
     }
     case LockWord::kHashCode: {
-      CHECK_EQ(hash_code_, static_cast<int32_t>(lw.GetHashCode()));
+      CHECK_EQ(hash_code_.LoadRelaxed(), static_cast<int32_t>(lw.GetHashCode()));
       break;
     }
     case LockWord::kFatLocked: {
@@ -623,7 +623,7 @@
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
   // Allocate and acquire a new monitor.
-  UniquePtr<Monitor> m(new Monitor(self, owner, obj, hash_code));
+  std::unique_ptr<Monitor> m(new Monitor(self, owner, obj, hash_code));
   if (m->Install(self)) {
     if (owner != nullptr) {
       VLOG(monitor) << "monitor: thread" << owner->GetThreadId()
diff --git a/runtime/monitor.h b/runtime/monitor.h
index bc5d2e4..bc1b2ed4 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -107,7 +107,7 @@
   bool IsLocked() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool HasHashCode() const {
-    return hash_code_.Load() != 0;
+    return hash_code_.LoadRelaxed() != 0;
   }
 
   MonitorId GetMonitorId() const {
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 5353592..8f74dd7 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -115,7 +115,9 @@
   if (outputName.c_str() == nullptr) {
     // FindOrCreateOatFileForDexLocation can tolerate a missing dex_location_checksum
     dex_file = linker->FindDexFileInOatFileFromDexLocation(sourceName.c_str(),
-                                                           dex_location_checksum_pointer, &error_msgs);
+                                                           dex_location_checksum_pointer,
+                                                           kRuntimeISA,
+                                                           &error_msgs);
   } else {
     // FindOrCreateOatFileForDexLocation requires the dex_location_checksum
     if (dex_location_checksum_pointer == NULL) {
@@ -198,7 +200,7 @@
 static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jlong cookie) {
   jobjectArray result = nullptr;
   const DexFile* dex_file = toDexFile(cookie, env);
-  if (dex_file == nullptr) {
+  if (dex_file != nullptr) {
     result = env->NewObjectArray(dex_file->NumClassDefs(), WellKnownClasses::java_lang_String,
                                  nullptr);
     if (result != nullptr) {
@@ -297,6 +299,52 @@
     }
   }
 
+  const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set);
+
+  // Check if we have an odex file next to the dex file.
+  std::string odex_filename(DexFilenameToOdexFilename(filename, kRuntimeISA));
+  std::string error_msg;
+  std::unique_ptr<const OatFile> oat_file(OatFile::Open(odex_filename, odex_filename, NULL, false,
+                                                        &error_msg));
+  if (oat_file.get() == nullptr) {
+    if (kVerboseLogging) {
+      LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename
+          << "': " << error_msg;
+    }
+    error_msg.clear();
+  } else {
+    const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename, NULL,
+                                                                           kReasonLogging);
+    if (oat_dex_file != nullptr) {
+      uint32_t location_checksum;
+      // If its not possible to read the classes.dex assume up-to-date as we won't be able to
+      // compile it anyway.
+      if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
+        if (kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded ignoring precompiled stripped file: "
+              << filename << ": " << error_msg;
+        }
+        return JNI_FALSE;
+      }
+      if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
+                                              target_instruction_set,
+                                              &error_msg)) {
+        if (kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded precompiled file " << odex_filename
+              << " has an up-to-date checksum compared to " << filename;
+        }
+        return JNI_FALSE;
+      } else {
+        if (kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded found precompiled file " << odex_filename
+              << " with an out-of-date checksum compared to " << filename
+              << ": " << error_msg;
+        }
+        error_msg.clear();
+      }
+    }
+  }
+
   // Check the profile file.  We need to rerun dex2oat if the profile has changed significantly
   // since the last time, or it's new.
   // If the 'defer' argument is true then this will be retried later.  In this case we
@@ -384,52 +432,6 @@
     }
   }
 
-  const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set);
-
-  // Check if we have an odex file next to the dex file.
-  std::string odex_filename(OatFile::DexFilenameToOdexFilename(filename));
-  std::string error_msg;
-  UniquePtr<const OatFile> oat_file(OatFile::Open(odex_filename, odex_filename, NULL, false,
-                                                  &error_msg));
-  if (oat_file.get() == nullptr) {
-    if (kVerboseLogging) {
-      LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename
-          << "': " << error_msg;
-    }
-    error_msg.clear();
-  } else {
-    const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename, NULL,
-                                                                           kReasonLogging);
-    if (oat_dex_file != nullptr) {
-      uint32_t location_checksum;
-      // If its not possible to read the classes.dex assume up-to-date as we won't be able to
-      // compile it anyway.
-      if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
-        if (kVerboseLogging) {
-          LOG(INFO) << "DexFile_isDexOptNeeded ignoring precompiled stripped file: "
-              << filename << ": " << error_msg;
-        }
-        return JNI_FALSE;
-      }
-      if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
-                                              target_instruction_set,
-                                              &error_msg)) {
-        if (kVerboseLogging) {
-          LOG(INFO) << "DexFile_isDexOptNeeded precompiled file " << odex_filename
-              << " has an up-to-date checksum compared to " << filename;
-        }
-        return JNI_FALSE;
-      } else {
-        if (kVerboseLogging) {
-          LOG(INFO) << "DexFile_isDexOptNeeded found precompiled file " << odex_filename
-              << " with an out-of-date checksum compared to " << filename
-              << ": " << error_msg;
-        }
-        error_msg.clear();
-      }
-    }
-  }
-
   // Check if we have an oat file in the cache
   const std::string cache_dir(GetDalvikCacheOrDie(instruction_set));
   const std::string cache_location(
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index cb8e623..f2b8a03 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -73,12 +73,12 @@
   const DexFile* dex_file = path[index];
   const std::string& location(dex_file->GetLocation());
   std::string error_msg;
-  UniquePtr<ZipArchive> zip_archive(ZipArchive::Open(location.c_str(), &error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(location.c_str(), &error_msg));
   if (zip_archive.get() == nullptr) {
     LOG(WARNING) << "Failed to open zip archive '" << location << "': " << error_msg;
     return NULL;
   }
-  UniquePtr<ZipEntry> zip_entry(zip_archive->Find(name.c_str(), &error_msg));
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(name.c_str(), &error_msg));
   if (zip_entry.get() == NULL) {
     return NULL;
   }
diff --git a/runtime/native/scoped_fast_native_object_access.h b/runtime/native/scoped_fast_native_object_access.h
index 744ac05..822aefa 100644
--- a/runtime/native/scoped_fast_native_object_access.h
+++ b/runtime/native/scoped_fast_native_object_access.h
@@ -24,12 +24,12 @@
 
 // Variant of ScopedObjectAccess that does no runnable transitions. Should only be used by "fast"
 // JNI methods.
-class ScopedFastNativeObjectAccess : public ScopedObjectAccess {
+class ScopedFastNativeObjectAccess : public ScopedObjectAccessAlreadyRunnable {
  public:
   explicit ScopedFastNativeObjectAccess(JNIEnv* env)
     LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
     SHARED_LOCK_FUNCTION(Locks::mutator_lock_) ALWAYS_INLINE
-     : ScopedObjectAccess(env) {
+     : ScopedObjectAccessAlreadyRunnable(env) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK((*Self()->GetManagedStack()->GetTopQuickFrame())->IsFastNative());
     // Don't work with raw objects in non-runnable states.
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 7976f6a..74dfe91 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -33,17 +33,6 @@
 
 namespace art {
 
-std::string OatFile::DexFilenameToOdexFilename(const std::string& location) {
-  CHECK_GE(location.size(), 4U) << location;  // must be at least .123
-  size_t dot_index = location.size() - 3 - 1;  // 3=dex or zip or apk
-  CHECK_EQ('.', location[dot_index]) << location;
-  std::string odex_location(location);
-  odex_location.resize(dot_index + 1);
-  CHECK_EQ('.', odex_location[odex_location.size()-1]) << location << " " << odex_location;
-  odex_location += "odex";
-  return odex_location;
-}
-
 void OatFile::CheckLocation(const std::string& location) {
   CHECK(!location.empty());
 }
@@ -53,7 +42,7 @@
                              std::string* error_msg) {
   CHECK(!oat_contents.empty()) << location;
   CheckLocation(location);
-  UniquePtr<OatFile> oat_file(new OatFile(location));
+  std::unique_ptr<OatFile> oat_file(new OatFile(location));
   oat_file->begin_ = &oat_contents[0];
   oat_file->end_ = &oat_contents[oat_contents.size()];
   return oat_file->Setup(error_msg) ? oat_file.release() : nullptr;
@@ -82,7 +71,7 @@
   //
   // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
   // This won't work for portable runtime execution because it doesn't process relocations.
-  UniquePtr<File> file(OS::OpenFileForReading(filename.c_str()));
+  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
   if (file.get() == NULL) {
     *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
     return NULL;
@@ -99,7 +88,7 @@
                              const std::string& location,
                              byte* requested_base,
                              std::string* error_msg) {
-  UniquePtr<OatFile> oat_file(new OatFile(location));
+  std::unique_ptr<OatFile> oat_file(new OatFile(location));
   bool success = oat_file->Dlopen(elf_filename, requested_base, error_msg);
   if (!success) {
     return nullptr;
@@ -113,7 +102,7 @@
                               bool writable,
                               bool executable,
                               std::string* error_msg) {
-  UniquePtr<OatFile> oat_file(new OatFile(location));
+  std::unique_ptr<OatFile> oat_file(new OatFile(location));
   bool success = oat_file->ElfFileOpen(file, requested_base, writable, executable, error_msg);
   if (!success) {
     CHECK(!error_msg->empty());
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index e5dc53c..d703731 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -37,10 +37,6 @@
 
 class OatFile {
  public:
-  // Returns an .odex file name next adjacent to the dex location.
-  // For example, for "/foo/bar/baz.jar", return "/foo/bar/baz.odex".
-  static std::string DexFilenameToOdexFilename(const std::string& location);
-
   // Open an oat file. Returns NULL on failure.  Requested base can
   // optionally be used to request where the file should be loaded.
   static OatFile* Open(const std::string& filename,
@@ -266,10 +262,10 @@
   const byte* end_;
 
   // Backing memory map for oat file during when opened by ElfWriter during initial compilation.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
   // Backing memory map for oat file during cross compilation.
-  UniquePtr<ElfFile> elf_file_;
+  std::unique_ptr<ElfFile> elf_file_;
 
   // dlopen handle during runtime.
   void* dlopen_handle_;
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index 767c197..dd8ce16 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_OBJECT_CALLBACKS_H_
 #define ART_RUNTIME_OBJECT_CALLBACKS_H_
 
+// For ostream.
+#include <ostream>
 // For uint32_t.
 #include <stdint.h>
 // For size_t.
@@ -46,6 +48,7 @@
   kRootVMInternal,
   kRootJNIMonitor,
 };
+std::ostream& operator<<(std::ostream& os, const RootType& root_type);
 
 // Returns the new address of the object, returns root if it has not moved. tid and root_type are
 // only used by hprof.
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index d9a5813..e4403d7 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -16,14 +16,14 @@
 
 #include "os.h"
 
-#include <cstddef>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <cstddef>
+#include <memory>
 
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -41,7 +41,7 @@
 
 File* OS::OpenFileWithFlags(const char* name, int flags) {
   CHECK(name != NULL);
-  UniquePtr<File> file(new File);
+  std::unique_ptr<File> file(new File);
   if (!file->Open(name, flags, 0666)) {
     return NULL;
   }
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 55b6a27..4330d27 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 ParsedOptions* ParsedOptions::Create(const Runtime::Options& options, bool ignore_unrecognized) {
-  UniquePtr<ParsedOptions> parsed(new ParsedOptions());
+  std::unique_ptr<ParsedOptions> parsed(new ParsedOptions());
   if (parsed->Parse(options, ignore_unrecognized)) {
     return parsed.release();
   }
@@ -533,8 +533,8 @@
       Trace::SetDefaultClockSource(kProfilerClockSourceWall);
     } else if (option == "-Xprofile:dualclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceDual);
-    } else if (StartsWith(option, "-Xprofile:")) {
-      if (!ParseStringAfterChar(option, ';', &profile_output_filename_)) {
+    } else if (StartsWith(option, "-Xprofile-filename:")) {
+      if (!ParseStringAfterChar(option, ':', &profile_output_filename_)) {
         return false;
       }
       profile_ = true;
@@ -786,7 +786,7 @@
   UsageMessage(stream, "  -Xmethod-trace\n");
   UsageMessage(stream, "  -Xmethod-trace-file:filename");
   UsageMessage(stream, "  -Xmethod-trace-file-size:integervalue\n");
-  UsageMessage(stream, "  -Xprofile=filename\n");
+  UsageMessage(stream, "  -Xprofile-filename:filename\n");
   UsageMessage(stream, "  -Xprofile-period:integervalue\n");
   UsageMessage(stream, "  -Xprofile-duration:integervalue\n");
   UsageMessage(stream, "  -Xprofile-interval:integervalue\n");
@@ -883,11 +883,19 @@
   if (!ParseStringAfterChar(option, after_char, &substring)) {
     return false;
   }
-  std::istringstream iss(substring);
+  bool sane_val = true;
   double value;
-  iss >> value;
-  // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
-  const bool sane_val = iss.eof() && (value >= min) && (value <= max);
+  if (false) {
+    // TODO: this doesn't seem to work on the emulator.  b/15114595
+    std::stringstream iss(substring);
+    iss >> value;
+    // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
+    sane_val = iss.eof() && (value >= min) && (value <= max);
+  } else {
+    char* end = nullptr;
+    value = strtod(substring.c_str(), &end);
+    sane_val = *end == '\0' && value >= min && value <= max;
+  }
   if (!sane_val) {
     Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
     return false;
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index 39f7638..b58a29c 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -16,7 +16,8 @@
 
 #include "parsed_options.h"
 
-#include "UniquePtrCompat.h"
+#include <memory>
+
 #include "common_runtime_test.h"
 
 namespace art {
@@ -53,7 +54,7 @@
   options.push_back(std::make_pair("vfprintf", test_vfprintf));
   options.push_back(std::make_pair("abort", test_abort));
   options.push_back(std::make_pair("exit", test_exit));
-  UniquePtr<ParsedOptions> parsed(ParsedOptions::Create(options, false));
+  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false));
   ASSERT_TRUE(parsed.get() != NULL);
 
   EXPECT_EQ(lib_core, parsed->boot_class_path_string_);
diff --git a/runtime/profiler.h b/runtime/profiler.h
index bcd7c29..938fdb7 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_PROFILER_H_
 #define ART_RUNTIME_PROFILER_H_
 
+#include <memory>
 #include <ostream>
 #include <set>
 #include <string>
@@ -29,7 +30,6 @@
 #include "instrumentation.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -177,7 +177,7 @@
 
   ProfileSampleResults profile_table_;
 
-  UniquePtr<Barrier> profiler_barrier_;
+  std::unique_ptr<Barrier> profiler_barrier_;
 
   // Set of methods to be filtered out.  This will probably be rare because
   // most of the methods we want to be filtered reside in the boot path and
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index cbd66a6..c08cc30 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -100,7 +100,8 @@
     AppendWide(jv.j);
   }
 
-  void BuildArgArrayFromVarArgs(const ScopedObjectAccess& soa, mirror::Object* receiver, va_list ap)
+  void BuildArgArrayFromVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
+                                mirror::Object* receiver, va_list ap)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
     if (receiver != nullptr) {
@@ -135,8 +136,8 @@
     }
   }
 
-  void BuildArgArrayFromJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
-                                jvalue* args)
+  void BuildArgArrayFromJValues(const ScopedObjectAccessAlreadyRunnable& soa,
+                                mirror::Object* receiver, jvalue* args)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
     if (receiver != nullptr) {
@@ -217,7 +218,8 @@
                      PrettyDescriptor(found_descriptor.as_string()).c_str()).c_str());
   }
 
-  bool BuildArgArrayFromObjectArray(const ScopedObjectAccess& soa, mirror::Object* receiver,
+  bool BuildArgArrayFromObjectArray(const ScopedObjectAccessAlreadyRunnable& soa,
+                                    mirror::Object* receiver,
                                     mirror::ObjectArray<mirror::Object>* args, MethodHelper& mh)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile::TypeList* classes = mh.GetParameterTypeList();
@@ -342,7 +344,7 @@
   uint32_t num_bytes_;
   uint32_t* arg_array_;
   uint32_t small_arg_array_[kSmallArgArraySize];
-  UniquePtr<uint32_t[]> large_arg_array_;
+  std::unique_ptr<uint32_t[]> large_arg_array_;
 };
 
 static void CheckMethodArguments(mirror::ArtMethod* m, uint32_t* args)
@@ -396,8 +398,9 @@
 }
 
 
-static void InvokeWithArgArray(const ScopedObjectAccessUnchecked& soa, mirror::ArtMethod* method,
-                               ArgArray* arg_array, JValue* result, const char* shorty)
+static void InvokeWithArgArray(const ScopedObjectAccessAlreadyRunnable& soa,
+                               mirror::ArtMethod* method, ArgArray* arg_array, JValue* result,
+                               const char* shorty)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint32_t* args = arg_array->GetArray();
   if (UNLIKELY(soa.Env()->check_jni)) {
@@ -406,7 +409,8 @@
   method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, shorty);
 }
 
-JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj, jmethodID mid, va_list args)
+JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
+                         va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
@@ -418,7 +422,7 @@
   return result;
 }
 
-JValue InvokeWithJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
+JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args) {
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   MethodHelper mh(method);
@@ -429,7 +433,7 @@
   return result;
 }
 
-JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args) {
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   MethodHelper mh(method);
@@ -440,7 +444,7 @@
   return result;
 }
 
-JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
                                            jobject obj, jmethodID mid, va_list args) {
   mirror::Object* receiver = soa.Decode<mirror::Object*>(obj);
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
@@ -460,7 +464,7 @@
                                     mh.GetShorty());
 }
 
-jobject InvokeMethod(const ScopedObjectAccess& soa, jobject javaMethod,
+jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs, bool accessible) {
   mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
 
diff --git a/runtime/reflection.h b/runtime/reflection.h
index d9a7228..2c54c06 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -29,8 +29,7 @@
 }  // namespace mirror
 union JValue;
 class MethodHelper;
-class ScopedObjectAccess;
-class ScopedObjectAccessUnchecked;
+class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 class ThrowLocation;
 
@@ -48,18 +47,19 @@
                            const JValue& src, JValue* dst)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj, jmethodID mid, va_list args)
+JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
+                         va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeWithJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
+JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
                                            jobject obj, jmethodID mid, va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -67,7 +67,7 @@
                            MethodHelper& mh, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-jobject InvokeMethod(const ScopedObjectAccess& soa, jobject method, jobject receiver,
+jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject method, jobject receiver,
                      jobject args, bool accessible)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index d183cba..dcbf42d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -27,6 +27,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
+#include <memory>
 #include <vector>
 #include <fcntl.h>
 
@@ -72,7 +73,6 @@
 #include "trace.h"
 #include "transaction.h"
 #include "profiler.h"
-#include "UniquePtrCompat.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
 
@@ -395,7 +395,10 @@
 
   system_class_loader_ = CreateSystemClassLoader();
 
-  self->GetJniEnv()->locals.AssertEmpty();
+  {
+    ScopedObjectAccess soa(self);
+    self->GetJniEnv()->locals.AssertEmpty();
+  }
 
   VLOG(startup) << "Runtime::Start exiting";
 
@@ -502,7 +505,7 @@
 bool Runtime::Init(const Options& raw_options, bool ignore_unrecognized) {
   CHECK_EQ(sysconf(_SC_PAGE_SIZE), kPageSize);
 
-  UniquePtr<ParsedOptions> options(ParsedOptions::Create(raw_options, ignore_unrecognized));
+  std::unique_ptr<ParsedOptions> options(ParsedOptions::Create(raw_options, ignore_unrecognized));
   if (options.get() == NULL) {
     LOG(ERROR) << "Failed to parse options";
     return false;
@@ -619,6 +622,9 @@
   class_linker_ = new ClassLinker(intern_table_);
   if (GetHeap()->HasImageSpace()) {
     class_linker_->InitFromImage();
+    if (kIsDebugBuild) {
+      GetHeap()->GetImageSpace()->VerifyImageAllocations();
+    }
   } else {
     CHECK(options->boot_class_path_ != NULL);
     CHECK_NE(options->boot_class_path_->size(), 0U);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 07b47c3..f7074f6 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -82,7 +82,7 @@
 
 class Runtime {
  public:
-  typedef std::vector<std::pair<std::string, const void*> > Options;
+  typedef std::vector<std::pair<std::string, const void*>> Options;
 
   // Creates and initializes a new runtime.
   static bool Create(const Options& options, bool ignore_unrecognized)
@@ -523,7 +523,7 @@
   size_t threads_being_born_ GUARDED_BY(Locks::runtime_shutdown_lock_);
 
   // Waited upon until no threads are being born.
-  UniquePtr<ConditionVariable> shutdown_cond_ GUARDED_BY(Locks::runtime_shutdown_lock_);
+  std::unique_ptr<ConditionVariable> shutdown_cond_ GUARDED_BY(Locks::runtime_shutdown_lock_);
 
   // Set when runtime shutdown is past the point that new threads may attach.
   bool shutting_down_ GUARDED_BY(Locks::runtime_shutdown_lock_);
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 393bf92..190db60 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -27,7 +27,7 @@
 // Equivalent to std::map, but without operator[] and its bug-prone semantics (in particular,
 // the implicit insertion of a default-constructed value on failed lookups).
 template <typename K, typename V, typename Comparator = std::less<K>,
-          typename Allocator = std::allocator<std::pair<const K, V> > >
+          typename Allocator = std::allocator<std::pair<const K, V>>>
 class SafeMap {
  private:
   typedef SafeMap<K, V, Comparator, Allocator> Self;
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index dbd961f..d56495e 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -93,50 +93,15 @@
   ThreadState old_thread_state_;
   const bool expected_has_no_thread_;
 
+  friend class ScopedObjectAccessUnchecked;
   DISALLOW_COPY_AND_ASSIGN(ScopedThreadStateChange);
 };
 
-// Entry/exit processing for transitions from Native to Runnable (ie within JNI functions).
-//
-// This class performs the necessary thread state switching to and from Runnable and lets us
-// amortize the cost of working out the current thread. Additionally it lets us check (and repair)
-// apps that are using a JNIEnv on the wrong thread. The class also decodes and encodes Objects
-// into jobjects via methods of this class. Performing this here enforces the Runnable thread state
-// for use of Object, thereby inhibiting the Object being modified by GC whilst native or VM code
-// is also manipulating the Object.
-//
-// The destructor transitions back to the previous thread state, typically Native. In this state
-// GC and thread suspension may occur.
-//
-// For annotalysis the subclass ScopedObjectAccess (below) makes it explicit that a shared of
-// the mutator_lock_ will be acquired on construction.
-class ScopedObjectAccessUnchecked : public ScopedThreadStateChange {
+// Assumes we are already runnable.
+class ScopedObjectAccessAlreadyRunnable {
  public:
-  explicit ScopedObjectAccessUnchecked(JNIEnv* env)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
-      : ScopedThreadStateChange(ThreadForEnv(env), kRunnable),
-        env_(down_cast<JNIEnvExt*>(env)), vm_(env_->vm) {
-    self_->VerifyStack();
-    Locks::mutator_lock_->AssertSharedHeld(self_);
-  }
-
-  explicit ScopedObjectAccessUnchecked(Thread* self)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      : ScopedThreadStateChange(self, kRunnable),
-        env_(down_cast<JNIEnvExt*>(self->GetJniEnv())),
-        vm_(env_ != NULL ? env_->vm : NULL) {
-    self_->VerifyStack();
-    Locks::mutator_lock_->AssertSharedHeld(self_);
-  }
-
-  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
-  // change into Runnable or acquire a share on the mutator_lock_.
-  explicit ScopedObjectAccessUnchecked(JavaVM* vm)
-      : ScopedThreadStateChange(), env_(NULL), vm_(down_cast<JavaVMExt*>(vm)) {}
-
-  // Here purely to force inlining.
-  ~ScopedObjectAccessUnchecked() ALWAYS_INLINE {
-    Locks::mutator_lock_->AssertSharedHeld(self_);
+  Thread* Self() const {
+    return self_;
   }
 
   JNIEnvExt* Env() const {
@@ -159,13 +124,11 @@
   template<typename T>
   T AddLocalReference(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     if (obj == NULL) {
       return NULL;
     }
-
     DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
-
     return Env()->AddLocalReference<T>(obj);
   }
 
@@ -173,14 +136,14 @@
   T Decode(jobject obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     return down_cast<T>(Self()->DecodeJObject(obj));
   }
 
   mirror::ArtField* DecodeField(jfieldID fid) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingFields);
     return reinterpret_cast<mirror::ArtField*>(fid);
   }
@@ -188,7 +151,7 @@
   jfieldID EncodeField(mirror::ArtField* field) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingFields);
     return reinterpret_cast<jfieldID>(field);
   }
@@ -196,7 +159,7 @@
   mirror::ArtMethod* DecodeMethod(jmethodID mid) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingMethods);
     return reinterpret_cast<mirror::ArtMethod*>(mid);
   }
@@ -204,16 +167,83 @@
   jmethodID EncodeMethod(mirror::ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingMethods);
     return reinterpret_cast<jmethodID>(method);
   }
 
- private:
+  bool IsRunnable() const {
+    return self_->GetState() == kRunnable;
+  }
+
+ protected:
+  explicit ScopedObjectAccessAlreadyRunnable(JNIEnv* env)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : self_(ThreadForEnv(env)), env_(down_cast<JNIEnvExt*>(env)), vm_(env_->vm) {
+  }
+
+  explicit ScopedObjectAccessAlreadyRunnable(Thread* self)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : self_(self), env_(down_cast<JNIEnvExt*>(self->GetJniEnv())),
+        vm_(env_ != nullptr ? env_->vm : nullptr) {
+  }
+
+  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
+  // change into Runnable or acquire a share on the mutator_lock_.
+  explicit ScopedObjectAccessAlreadyRunnable(JavaVM* vm)
+      : self_(nullptr), env_(nullptr), vm_(down_cast<JavaVMExt*>(vm)) {}
+
+  // Here purely to force inlining.
+  ~ScopedObjectAccessAlreadyRunnable() ALWAYS_INLINE {
+  }
+
+  // Self thread, can be null.
+  Thread* const self_;
   // The full JNIEnv.
   JNIEnvExt* const env_;
   // The full JavaVM.
   JavaVMExt* const vm_;
+};
+
+// Entry/exit processing for transitions from Native to Runnable (ie within JNI functions).
+//
+// This class performs the necessary thread state switching to and from Runnable and lets us
+// amortize the cost of working out the current thread. Additionally it lets us check (and repair)
+// apps that are using a JNIEnv on the wrong thread. The class also decodes and encodes Objects
+// into jobjects via methods of this class. Performing this here enforces the Runnable thread state
+// for use of Object, thereby inhibiting the Object being modified by GC whilst native or VM code
+// is also manipulating the Object.
+//
+// The destructor transitions back to the previous thread state, typically Native. In this state
+// GC and thread suspension may occur.
+//
+// For annotalysis the subclass ScopedObjectAccess (below) makes it explicit that a shared of
+// the mutator_lock_ will be acquired on construction.
+class ScopedObjectAccessUnchecked : public ScopedObjectAccessAlreadyRunnable {
+ public:
+  explicit ScopedObjectAccessUnchecked(JNIEnv* env)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(env), tsc_(Self(), kRunnable) {
+    Self()->VerifyStack();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
+  }
+
+  explicit ScopedObjectAccessUnchecked(Thread* self)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(self), tsc_(self, kRunnable) {
+    Self()->VerifyStack();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
+  }
+
+  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
+  // change into Runnable or acquire a share on the mutator_lock_.
+  explicit ScopedObjectAccessUnchecked(JavaVM* vm) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(vm), tsc_() {}
+
+ private:
+  // The scoped thread state change makes sure that we are runnable and restores the thread state
+  // in the destructor.
+  const ScopedThreadStateChange tsc_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedObjectAccessUnchecked);
 };
@@ -229,7 +259,7 @@
 
   explicit ScopedObjectAccess(Thread* self)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
+      SHARED_LOCK_FUNCTION(Locks::mutator_lock_) ALWAYS_INLINE
       : ScopedObjectAccessUnchecked(self) {
   }
 
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 611c0a8..c13776d 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -106,7 +106,7 @@
     PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
     return;
   }
-  UniquePtr<File> file(new File(fd, stack_trace_file_));
+  std::unique_ptr<File> file(new File(fd, stack_trace_file_));
   if (!file->WriteFully(s.data(), s.size())) {
     PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file_ << "'";
   } else {
diff --git a/runtime/stack.cc b/runtime/stack.cc
index e0189e9..be1fba4 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -139,9 +139,11 @@
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    const VmapTable vmap_table(m->GetVmapTable());
+    const void* code_pointer = m->GetQuickOatCodePointer();
+    DCHECK(code_pointer != nullptr);
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset;
-    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo();
     // TODO: IsInContext stops before spotting floating point registers.
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
@@ -163,9 +165,11 @@
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    const VmapTable vmap_table(m->GetVmapTable());
+    const void* code_pointer = m->GetQuickOatCodePointer();
+    DCHECK(code_pointer != nullptr);
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset;
-    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo();
     // TODO: IsInContext stops before spotting floating point registers.
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
@@ -316,7 +320,7 @@
         }
         size_t frame_size = method->GetFrameSizeInBytes();
         // Compute PC for next stack frame from return PC.
-        size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
+        size_t return_pc_offset = method->GetReturnPcOffsetInBytes(frame_size);
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
         if (UNLIKELY(exit_stubs_installed)) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 488961e..41cfc58 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -171,7 +171,7 @@
   return nullptr;
 }
 
-Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa,
+Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
                                   mirror::Object* thread_peer) {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
@@ -186,7 +186,8 @@
   return result;
 }
 
-Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa, jobject java_thread) {
+Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
+                                  jobject java_thread) {
   return FromManagedThread(soa, soa.Decode<mirror::Object*>(java_thread));
 }
 
@@ -556,7 +557,7 @@
   DumpStack(os);
 }
 
-mirror::String* Thread::GetThreadName(const ScopedObjectAccessUnchecked& soa) const {
+mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
   return (tlsPtr_.opeer != nullptr) ? reinterpret_cast<mirror::String*>(f->GetObject(tlsPtr_.opeer)) : nullptr;
 }
@@ -932,7 +933,7 @@
 }
 
 void Thread::DumpJavaStack(std::ostream& os) const {
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   StackDumpVisitor dumper(os, const_cast<Thread*>(this), context.get(),
                           !tls32_.throwing_OutOfMemoryError);
   dumper.WalkStack();
@@ -1432,7 +1433,7 @@
 };
 
 template<bool kTransactionActive>
-jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessUnchecked& soa) const {
+jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable& soa) const {
   // Compute depth of stack
   CountStackDepthVisitor count_visitor(const_cast<Thread*>(this));
   count_visitor.WalkStack();
@@ -1455,11 +1456,14 @@
   }
   return soa.AddLocalReference<jobjectArray>(trace);
 }
-template jobject Thread::CreateInternalStackTrace<false>(const ScopedObjectAccessUnchecked& soa) const;
-template jobject Thread::CreateInternalStackTrace<true>(const ScopedObjectAccessUnchecked& soa) const;
+template jobject Thread::CreateInternalStackTrace<false>(
+    const ScopedObjectAccessAlreadyRunnable& soa) const;
+template jobject Thread::CreateInternalStackTrace<true>(
+    const ScopedObjectAccessAlreadyRunnable& soa) const;
 
-jobjectArray Thread::InternalStackTraceToStackTraceElementArray(const ScopedObjectAccess& soa,
-    jobject internal, jobjectArray output_array, int* stack_depth) {
+jobjectArray Thread::InternalStackTraceToStackTraceElementArray(
+    const ScopedObjectAccessAlreadyRunnable& soa, jobject internal, jobjectArray output_array,
+    int* stack_depth) {
   // Decode the internal stack trace into the depth, method trace and PC trace
   int32_t depth = soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal)->GetLength() - 1;
 
@@ -2020,10 +2024,14 @@
       size_t num_regs = std::min(map.RegWidth() * 8,
                                  static_cast<size_t>(code_item->registers_size_));
       if (num_regs > 0) {
-        const uint8_t* reg_bitmap = map.FindBitMap(GetNativePcOffset());
+        Runtime* runtime = Runtime::Current();
+        const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m);
+        uintptr_t native_pc_offset = m->NativePcOffset(GetCurrentQuickFramePc(), entry_point);
+        const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
         DCHECK(reg_bitmap != nullptr);
-        const VmapTable vmap_table(m->GetVmapTable());
-        QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo();
+        const void* code_pointer = mirror::ArtMethod::EntryPointToCodePointer(entry_point);
+        const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+        QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
         // For all dex registers in the bitmap
         mirror::ArtMethod** cur_quick_frame = GetCurrentQuickFrame();
         DCHECK(cur_quick_frame != nullptr);
@@ -2144,7 +2152,7 @@
 }
 
 void Thread::VerifyStackImpl() {
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap(), GetThreadId());
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitorToCallback);
   mapper.WalkStack();
diff --git a/runtime/thread.h b/runtime/thread.h
index be7634f..62fa323 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -21,6 +21,7 @@
 #include <deque>
 #include <iosfwd>
 #include <list>
+#include <memory>
 #include <string>
 
 #include "base/macros.h"
@@ -39,7 +40,6 @@
 #include "stack.h"
 #include "thread_state.h"
 #include "throw_location.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -72,8 +72,7 @@
 struct JNIEnvExt;
 class Monitor;
 class Runtime;
-class ScopedObjectAccess;
-class ScopedObjectAccessUnchecked;
+class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 struct SingleStepControl;
 class Thread;
@@ -98,7 +97,7 @@
   // Space to throw a StackOverflowError in.
   // TODO: shrink reserved space, in particular for 64bit.
 #if defined(__x86_64__)
-  static constexpr size_t kStackOverflowReservedBytes = 24 * KB;
+  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
 #elif defined(__aarch64__)
   // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
   // But this one works rather well.
@@ -140,12 +139,12 @@
 
   static Thread* Current();
 
-  static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts,
+  static Thread* FromManagedThread(const ScopedObjectAccessAlreadyRunnable& ts,
                                    mirror::Object* thread_peer)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts, jobject thread)
+  static Thread* FromManagedThread(const ScopedObjectAccessAlreadyRunnable& ts, jobject thread)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -276,7 +275,7 @@
   }
 
   // Returns the java.lang.Thread's name, or NULL if this Thread* doesn't have a peer.
-  mirror::String* GetThreadName(const ScopedObjectAccessUnchecked& ts) const
+  mirror::String* GetThreadName(const ScopedObjectAccessAlreadyRunnable& ts) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sets 'name' to the java.lang.Thread's name. This requires no transition to managed code,
@@ -458,15 +457,16 @@
   // Create the internal representation of a stack trace, that is more time
   // and space efficient to compute than the StackTraceElement[].
   template<bool kTransactionActive>
-  jobject CreateInternalStackTrace(const ScopedObjectAccessUnchecked& soa) const
+  jobject CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable& soa) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Convert an internal stack trace representation (returned by CreateInternalStackTrace) to a
   // StackTraceElement[]. If output_array is NULL, a new array is created, otherwise as many
   // frames as will fit are written into the given array. If stack_depth is non-NULL, it's updated
   // with the number of valid frames in the returned array.
-  static jobjectArray InternalStackTraceToStackTraceElementArray(const ScopedObjectAccess& soa,
-      jobject internal, jobjectArray output_array = nullptr, int* stack_depth = nullptr)
+  static jobjectArray InternalStackTraceToStackTraceElementArray(
+      const ScopedObjectAccessAlreadyRunnable& soa, jobject internal,
+      jobjectArray output_array = nullptr, int* stack_depth = nullptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void VisitRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 31d8d60..8046500 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -754,7 +754,7 @@
   if (VLOG_IS_ON(threads)) {
     std::ostringstream oss;
     self->ShortDump(oss);  // We don't hold the mutator_lock_ yet and so cannot call Dump.
-    LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss;
+    LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss.str();
   }
 
   // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index 23bf294..c816c84 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -53,7 +53,7 @@
 
   ThreadPool* const thread_pool_;
   const std::string name_;
-  UniquePtr<MemMap> stack_;
+  std::unique_ptr<MemMap> stack_;
   pthread_t pthread_;
 
  private:
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index c1a1ad7..292c94f 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -69,7 +69,7 @@
   // Wait for tasks to complete.
   thread_pool.Wait(self, true, false);
   // Make sure that we finished all the work.
-  EXPECT_EQ(num_tasks, count);
+  EXPECT_EQ(num_tasks, count.LoadSequentiallyConsistent());
 }
 
 TEST_F(ThreadPoolTest, StopStart) {
@@ -82,7 +82,7 @@
   }
   usleep(200);
   // Check that no threads started prematurely.
-  EXPECT_EQ(0, count);
+  EXPECT_EQ(0, count.LoadSequentiallyConsistent());
   // Signal the threads to start processing tasks.
   thread_pool.StartWorkers(self);
   usleep(200);
@@ -91,10 +91,11 @@
   thread_pool.AddTask(self, new CountTask(&bad_count));
   usleep(200);
   // Ensure that the task added after the workers were stopped doesn't get run.
-  EXPECT_EQ(0, bad_count);
+  EXPECT_EQ(0, bad_count.LoadSequentiallyConsistent());
   // Allow tasks to finish up and delete themselves.
   thread_pool.StartWorkers(self);
-  while (count.Load() != num_tasks && bad_count.Load() != 1) {
+  while (count.LoadSequentiallyConsistent() != num_tasks &&
+      bad_count.LoadSequentiallyConsistent() != 1) {
     usleep(200);
   }
   thread_pool.StopWorkers(self);
@@ -135,7 +136,7 @@
   thread_pool.AddTask(self, new TreeTask(&thread_pool, &count, depth));
   thread_pool.StartWorkers(self);
   thread_pool.Wait(self, true, false);
-  EXPECT_EQ((1 << depth) - 1, count);
+  EXPECT_EQ((1 << depth) - 1, count.LoadSequentiallyConsistent());
 }
 
 }  // namespace art
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b85eb7e..d53b369 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -119,7 +119,7 @@
 
 Trace* volatile Trace::the_trace_ = NULL;
 pthread_t Trace::sampling_pthread_ = 0U;
-UniquePtr<std::vector<mirror::ArtMethod*> > Trace::temp_stack_trace_;
+std::unique_ptr<std::vector<mirror::ArtMethod*>> Trace::temp_stack_trace_;
 
 static mirror::ArtMethod* DecodeTraceMethodId(uint32_t tmid) {
   return reinterpret_cast<mirror::ArtMethod*>(tmid & ~kTraceMethodActionMask);
@@ -339,7 +339,7 @@
   runtime->GetThreadList()->SuspendAll();
 
   // Open trace file if not going directly to ddms.
-  UniquePtr<File> trace_file;
+  std::unique_ptr<File> trace_file;
   if (!direct_to_ddms) {
     if (trace_fd < 0) {
       trace_file.reset(OS::CreateEmptyFile(trace_filename));
diff --git a/runtime/trace.h b/runtime/trace.h
index ef6c642..08da16f 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_TRACE_H_
 #define ART_RUNTIME_TRACE_H_
 
+#include <memory>
 #include <ostream>
 #include <set>
 #include <string>
@@ -27,7 +28,6 @@
 #include "instrumentation.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -140,13 +140,13 @@
   static pthread_t sampling_pthread_;
 
   // Used to remember an unused stack trace to avoid re-allocation during sampling.
-  static UniquePtr<std::vector<mirror::ArtMethod*> > temp_stack_trace_;
+  static std::unique_ptr<std::vector<mirror::ArtMethod*>> temp_stack_trace_;
 
   // File to write trace data out to, NULL if direct to ddms.
-  UniquePtr<File> trace_file_;
+  std::unique_ptr<File> trace_file_;
 
   // Buffer to store trace data.
-  UniquePtr<uint8_t> buf_;
+  std::unique_ptr<uint8_t> buf_;
 
   // Flags enabling extra tracing of things such as alloc counts.
   const int flags_;
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ad0175a..f562252 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -22,10 +22,9 @@
 #include <sys/syscall.h>
 #include <sys/types.h>
 #include <sys/wait.h>
-
 #include <unistd.h>
+#include <memory>
 
-#include "UniquePtrCompat.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
@@ -105,7 +104,7 @@
 }
 
 bool ReadFileToString(const std::string& file_name, std::string* result) {
-  UniquePtr<File> file(new File);
+  std::unique_ptr<File> file(new File);
   if (!file->Open(file_name, O_RDONLY)) {
     return false;
   }
@@ -1045,7 +1044,7 @@
   if (current_method != nullptr) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  UniquePtr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid));
+  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid));
   if (!backtrace->Unwind(0)) {
     os << prefix << "(backtrace::Unwind failed for thread " << tid << ")\n";
     return;
@@ -1204,6 +1203,37 @@
   return StringPrintf("%s/%s", cache_location, cache_file.c_str());
 }
 
+static void InsertIsaDirectory(const InstructionSet isa, std::string* filename) {
+  // in = /foo/bar/baz
+  // out = /foo/bar/<isa>/baz
+  size_t pos = filename->rfind('/');
+  CHECK_NE(pos, std::string::npos) << *filename << " " << isa;
+  filename->insert(pos, "/", 1);
+  filename->insert(pos + 1, GetInstructionSetString(isa));
+}
+
+std::string GetSystemImageFilename(const char* location, const InstructionSet isa) {
+  // location = /system/framework/boot.art
+  // filename = /system/framework/<isa>/boot.art
+  std::string filename(location);
+  InsertIsaDirectory(isa, &filename);
+  return filename;
+}
+
+std::string DexFilenameToOdexFilename(const std::string& location, const InstructionSet isa) {
+  // location = /foo/bar/baz.jar
+  // odex_location = /foo/bar/<isa>/baz.odex
+  CHECK_GE(location.size(), 4U) << location;  // must be at least .123
+  std::string odex_location(location);
+  InsertIsaDirectory(isa, &odex_location);
+  size_t dot_index = odex_location.size() - 3 - 1;  // 3=dex or zip or apk
+  CHECK_EQ('.', odex_location[dot_index]) << location;
+  odex_location.resize(dot_index + 1);
+  CHECK_EQ('.', odex_location[odex_location.size()-1]) << location << " " << odex_location;
+  odex_location += "odex";
+  return odex_location;
+}
+
 bool IsZipMagic(uint32_t magic) {
   return (('P' == ((magic >> 0) & 0xff)) &&
           ('K' == ((magic >> 8) & 0xff)));
diff --git a/runtime/utils.h b/runtime/utils.h
index 14a532e..4a9236a 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -25,6 +25,7 @@
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "globals.h"
+#include "instruction_set.h"
 #include "primitive.h"
 
 namespace art {
@@ -402,6 +403,13 @@
 std::string GetDalvikCacheFilenameOrDie(const char* file_location,
                                         const char* cache_location);
 
+// Returns the system location for an image
+std::string GetSystemImageFilename(const char* location, InstructionSet isa);
+
+// Returns an .odex file name next adjacent to the dex location.
+// For example, for "/foo/bar/baz.jar", return "/foo/bar/<isa>/baz.odex".
+std::string DexFilenameToOdexFilename(const std::string& location, InstructionSet isa);
+
 // Check whether the given magic matches a known file type.
 bool IsZipMagic(uint32_t magic);
 bool IsDexMagic(uint32_t magic);
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 8a8834d..4a1e477 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -351,6 +351,16 @@
                GetDalvikCacheFilenameOrDie("/system/framework/boot.art", "/foo").c_str());
 }
 
+TEST_F(UtilsTest, GetSystemImageFilename) {
+  EXPECT_STREQ("/system/framework/arm/boot.art",
+               GetSystemImageFilename("/system/framework/boot.art", kArm).c_str());
+}
+
+TEST_F(UtilsTest, DexFilenameToOdexFilename) {
+  EXPECT_STREQ("/foo/bar/arm/baz.odex",
+               DexFilenameToOdexFilename("/foo/bar/baz.jar", kArm).c_str());
+}
+
 TEST_F(UtilsTest, ExecSuccess) {
   std::vector<std::string> command;
   if (kIsTargetBuild) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 0a31f63..4863b83 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1433,8 +1433,8 @@
   // We need to ensure the work line is consistent while performing validation. When we spot a
   // peephole pattern we compute a new line for either the fallthrough instruction or the
   // branch target.
-  UniquePtr<RegisterLine> branch_line;
-  UniquePtr<RegisterLine> fallthrough_line;
+  std::unique_ptr<RegisterLine> branch_line;
+  std::unique_ptr<RegisterLine> fallthrough_line;
 
   // We need precise constant types only for deoptimization which happens at runtime.
   const bool need_precise_constant = !Runtime::Current()->IsCompiler();
@@ -3794,7 +3794,7 @@
       }
     }
   } else {
-    UniquePtr<RegisterLine> copy(gDebugVerify ?
+    std::unique_ptr<RegisterLine> copy(gDebugVerify ?
                                  RegisterLine::Create(target_line->NumRegs(), this) :
                                  NULL);
     if (gDebugVerify) {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 14200f7..495d3c5 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_VERIFIER_METHOD_VERIFIER_H_
 #define ART_RUNTIME_VERIFIER_METHOD_VERIFIER_H_
 
+#include <memory>
 #include <set>
 #include <vector>
 
@@ -33,7 +34,6 @@
 #include "reg_type_cache-inl.h"
 #include "register_line.h"
 #include "safe_map.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -126,7 +126,7 @@
   }
 
  private:
-  UniquePtr<RegisterLine*[]> register_lines_;
+  std::unique_ptr<RegisterLine*[]> register_lines_;
   size_t size_;
 };
 
@@ -617,14 +617,14 @@
   PcToRegisterLineTable reg_table_;
 
   // Storage for the register status we're currently working on.
-  UniquePtr<RegisterLine> work_line_;
+  std::unique_ptr<RegisterLine> work_line_;
 
   // The address of the instruction we're currently working on, note that this is in 2 byte
   // quantities
   uint32_t work_insn_idx_;
 
   // Storage for the register status we're saving for later.
-  UniquePtr<RegisterLine> saved_line_;
+  std::unique_ptr<RegisterLine> saved_line_;
 
   const uint32_t dex_method_idx_;  // The method we're working on.
   // Its object representation if known.
@@ -640,7 +640,7 @@
   const DexFile::CodeItem* const code_item_;  // The code item containing the code for the method.
   const RegType* declaring_class_;  // Lazily computed reg type of the method's declaring class.
   // Instruction widths and flags, one entry per code unit.
-  UniquePtr<InstructionFlags[]> insn_flags_;
+  std::unique_ptr<InstructionFlags[]> insn_flags_;
   // The dex PC of a FindLocksAtDexPc request, -1 otherwise.
   uint32_t interesting_dex_pc_;
   // The container into which FindLocksAtDexPc should write the registers containing held locks,
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 2bcf3e0..9ac04d7 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -17,8 +17,8 @@
 #include "method_verifier.h"
 
 #include <stdio.h>
+#include <memory>
 
-#include "UniquePtrCompat.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index f9f3e31..dade203 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -17,12 +17,12 @@
 #ifndef ART_RUNTIME_VERIFIER_REGISTER_LINE_H_
 #define ART_RUNTIME_VERIFIER_REGISTER_LINE_H_
 
+#include <memory>
 #include <vector>
 
 #include "dex_instruction.h"
 #include "reg_type.h"
 #include "safe_map.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 13b4219..841c01a 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -16,17 +16,15 @@
 
 #include "zip_archive.h"
 
-#include <vector>
-
 #include <fcntl.h>
 #include <stdio.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include <vector>
 
 #include "base/stringprintf.h"
 #include "base/unix_file/fd_file.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
@@ -56,7 +54,7 @@
   std::string name(entry_filename);
   name += " extracted in memory from ";
   name += entry_filename;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
                                              NULL, GetUncompressedLength(),
                                              PROT_READ | PROT_WRITE, false, error_msg));
   if (map.get() == nullptr) {
@@ -123,7 +121,7 @@
   DCHECK(name != nullptr);
 
   // Resist the urge to delete the space. <: is a bigraph sequence.
-  UniquePtr< ::ZipEntry> zip_entry(new ::ZipEntry);
+  std::unique_ptr< ::ZipEntry> zip_entry(new ::ZipEntry);
   const int32_t error = FindEntry(handle_, name, zip_entry.get());
   if (error) {
     *error_msg = std::string(ErrorCodeString(error));
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index edaa88b..c0e2f2f 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -18,8 +18,9 @@
 #define ART_RUNTIME_ZIP_ARCHIVE_H_
 
 #include <stdint.h>
-#include <string>
 #include <ziparchive/zip_archive.h>
+#include <memory>
+#include <string>
 
 #include "base/logging.h"
 #include "base/unix_file/random_access_file.h"
@@ -27,7 +28,6 @@
 #include "mem_map.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/zip_archive_test.cc b/runtime/zip_archive_test.cc
index d0624cf..d303d1e 100644
--- a/runtime/zip_archive_test.cc
+++ b/runtime/zip_archive_test.cc
@@ -20,8 +20,8 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <zlib.h>
+#include <memory>
 
-#include "UniquePtrCompat.h"
 #include "common_runtime_test.h"
 #include "os.h"
 
@@ -31,16 +31,16 @@
 
 TEST_F(ZipArchiveTest, FindAndExtract) {
   std::string error_msg;
-  UniquePtr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileName().c_str(), &error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileName().c_str(), &error_msg));
   ASSERT_TRUE(zip_archive.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
-  UniquePtr<ZipEntry> zip_entry(zip_archive->Find("classes.dex", &error_msg));
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find("classes.dex", &error_msg));
   ASSERT_TRUE(zip_entry.get() != nullptr);
   ASSERT_TRUE(error_msg.empty());
 
   ScratchFile tmp;
   ASSERT_NE(-1, tmp.GetFd());
-  UniquePtr<File> file(new File(tmp.GetFd(), tmp.GetFilename()));
+  std::unique_ptr<File> file(new File(tmp.GetFd(), tmp.GetFilename()));
   ASSERT_TRUE(file.get() != NULL);
   bool success = zip_entry->ExtractToFile(*file, &error_msg);
   ASSERT_TRUE(success) << error_msg;
diff --git a/test/Android.mk b/test/Android.mk
index 9b79abf..8caa033 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -73,7 +73,7 @@
     LOCAL_JAVA_LIBRARIES := $(TARGET_CORE_JARS)
     LOCAL_NO_STANDARD_LIBRARIES := true
     LOCAL_MODULE_PATH := $(3)
-    LOCAL_DEX_PREOPT_IMAGE := $(TARGET_CORE_IMG_OUT)
+    LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     LOCAL_DEX_PREOPT := false
     LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
     LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
@@ -81,14 +81,6 @@
 
     ART_TEST_TARGET_DEX_FILES += $$(LOCAL_INSTALLED_MODULE)
     ART_TEST_TARGET_DEX_FILES$(ART_PHONY_TEST_TARGET_SUFFIX) += $$(LOCAL_INSTALLED_MODULE)
-
-    ifdef TARGET_2ND_ARCH
-	    ART_TEST_TARGET_DEX_FILES$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) += $(4)/$(1)-$(2).jar
-
-      # TODO: make this a simple copy
-$(4)/$(1)-$(2).jar: $(3)/$(1)-$(2).jar $(4)
-	cp $$< $(4)/
-    endif
   endif
 
   ifeq ($(ART_BUILD_HOST),true)
@@ -97,7 +89,7 @@
     LOCAL_SRC_FILES := $(call all-java-files-under, $(2))
     LOCAL_JAVA_LIBRARIES := $(HOST_CORE_JARS)
     LOCAL_NO_STANDARD_LIBRARIES := true
-    LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_OUT)
+    LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     LOCAL_DEX_PREOPT := false
     LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
     LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
@@ -105,25 +97,13 @@
     ART_TEST_HOST_DEX_FILES += $$(LOCAL_INSTALLED_MODULE)
   endif
 endef
-$(foreach dir,$(TEST_DEX_DIRECTORIES), $(eval $(call build-art-test-dex,art-test-dex,$(dir),$(ART_NATIVETEST_OUT),$(2ND_ART_NATIVETEST_OUT))))
-$(foreach dir,$(TEST_OAT_DIRECTORIES), $(eval $(call build-art-test-dex,oat-test-dex,$(dir),$(ART_TEST_OUT),$(2ND_ART_TEST_OUT))))
+$(foreach dir,$(TEST_DEX_DIRECTORIES), $(eval $(call build-art-test-dex,art-test-dex,$(dir),$(ART_NATIVETEST_OUT))))
+$(foreach dir,$(TEST_OAT_DIRECTORIES), $(eval $(call build-art-test-dex,oat-test-dex,$(dir),$(ART_TEST_OUT))))
 
 # Used outside the art project to get a list of the current tests
 ART_TEST_DEX_MAKE_TARGETS := $(addprefix art-test-dex-, $(TEST_DEX_DIRECTORIES))
 ART_TEST_OAT_MAKE_TARGETS := $(addprefix oat-test-dex-, $(TEST_OAT_DIRECTORIES))
 
-# Rules to explicitly create 2nd-arch test directories, as we use a "cp" for them
-# instead of BUILD_JAVA_LIBRARY
-ifneq ($(2ND_ART_NATIVETEST_OUT),)
-$(2ND_ART_NATIVETEST_OUT):
-	$(hide) mkdir -p $@
-endif
-
-ifneq ($(2ND_ART_TEST_OUT),)
-$(2ND_ART_TEST_OUT):
-	$(hide) mkdir -p $@
-endif
-
 ########################################################################
 
 ART_TEST_TARGET_OAT_TARGETS$(ART_PHONY_TEST_TARGET_SUFFIX) :=
@@ -133,12 +113,12 @@
 
 define declare-test-art-oat-targets-impl
 .PHONY: test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX)
-test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX): $($(2)ART_TEST_OUT)/oat-test-dex-$(1).jar test-art-target-sync
-	adb shell touch $($(2)ART_TEST_DIR)/test-art-target-oat-$(1)
-	adb shell rm $($(2)ART_TEST_DIR)/test-art-target-oat-$(1)
-	adb shell sh -c "/system/bin/dalvikvm$($(2)ART_TARGET_BINARY_SUFFIX) $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$($(2)ART_TEST_DIR)/core.art -classpath $($(2)ART_TEST_DIR)/oat-test-dex-$(1).jar -Djava.library.path=$($(2)ART_TEST_DIR) $(1) && touch $($(2)ART_TEST_DIR)/test-art-target-oat-$(1)"
-	$(hide) (adb pull $($(2)ART_TEST_DIR)/test-art-target-oat-$(1) /tmp/ && echo test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX) PASSED) || (echo test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX) FAILED && exit 1)
-	$(hide) rm /tmp/test-art-target-oat-$(1)
+test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX): $(ART_TEST_OUT)/oat-test-dex-$(1).jar test-art-target-sync
+	adb shell touch $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@
+	adb shell rm $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@
+	adb shell sh -c "/system/bin/dalvikvm$($(2)ART_PHONY_TEST_TARGET_SUFFIX) $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(ART_TEST_DIR)/core.art -classpath $(ART_TEST_DIR)/oat-test-dex-$(1).jar -Djava.library.path=$(ART_TEST_DIR)/$(TARGET_$(2)ARCH) $(1) && touch $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@"
+	$(hide) (adb pull $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@ /tmp/ && echo $$@ PASSED) || (echo $$@ FAILED && exit 1)
+	$(hide) rm /tmp/$$@
 endef
 
 # $(1): directory
@@ -154,26 +134,26 @@
   endif
   $(call declare-test-art-oat-targets-impl,$(1),)
 
-$(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex: $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar $(HOST_CORE_IMG_OUT) | $(DEX2OATD)
-	$(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --boot-image=$(HOST_CORE_IMG_OUT) --dex-file=$$(realpath $$<) --oat-file=$$(realpath $(HOST_OUT_JAVA_LIBRARIES))/oat-test-dex-$(1).odex --instruction-set=$(ART_HOST_ARCH) --host --android-root=$(HOST_OUT)
+$(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex: $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar $(HOST_CORE_IMG_OUT) | $(DEX2OATD)
+	$(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --boot-image=$(HOST_CORE_IMG_LOCATION) --dex-file=$$(realpath $$<) --oat-file=$$@ --instruction-set=$(ART_HOST_ARCH) --host --android-root=$(HOST_OUT)
 
 .PHONY: test-art-host-oat-default-$(1)
-test-art-host-oat-default-$(1): $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex test-art-host-dependencies
+test-art-host-oat-default-$(1): $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex test-art-host-dependencies
 	mkdir -p /tmp/android-data/test-art-host-oat-default-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-default-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
 	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
-	  $(HOST_OUT_EXECUTABLES)/dalvikvm $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$$(realpath $(HOST_CORE_IMG_OUT)) -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
+	  $(HOST_OUT_EXECUTABLES)/dalvikvm $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(HOST_CORE_IMG_LOCATION) -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
           && echo test-art-host-oat-default-$(1) PASSED || (echo test-art-host-oat-default-$(1) FAILED && exit 1)
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-default-$(1)
 
 .PHONY: test-art-host-oat-interpreter-$(1)
-test-art-host-oat-interpreter-$(1): $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex test-art-host-dependencies
+test-art-host-oat-interpreter-$(1): $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex test-art-host-dependencies
 	mkdir -p /tmp/android-data/test-art-host-oat-interpreter-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-interpreter-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
 	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
-	  $(HOST_OUT_EXECUTABLES)/dalvikvm -XXlib:libartd.so -Ximage:$$(realpath $(HOST_CORE_IMG_OUT)) -Xint -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
+	  $(HOST_OUT_EXECUTABLES)/dalvikvm -XXlib:libartd.so -Ximage:$(HOST_CORE_IMG_LOCATION) $(DALVIKVM_FLAGS) -Xint -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
           && echo test-art-host-oat-interpreter-$(1) PASSED || (echo test-art-host-oat-interpreter-$(1) FAILED && exit 1)
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-interpreter-$(1)
 
diff --git a/test/ReferenceMap/stack_walk_refmap_jni.cc b/test/ReferenceMap/stack_walk_refmap_jni.cc
index d8a0eef..48a6c61 100644
--- a/test/ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/ReferenceMap/stack_walk_refmap_jni.cc
@@ -15,8 +15,8 @@
  */
 
 #include <stdio.h>
+#include <memory>
 
-#include "UniquePtrCompat.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "gc_map.h"
diff --git a/test/StackWalk/stack_walk_jni.cc b/test/StackWalk/stack_walk_jni.cc
index d230ddd..3cf2d0b 100644
--- a/test/StackWalk/stack_walk_jni.cc
+++ b/test/StackWalk/stack_walk_jni.cc
@@ -15,8 +15,8 @@
  */
 
 #include <stdio.h>
+#include <memory>
 
-#include "UniquePtrCompat.h"
 #include "class_linker.h"
 #include "gc_map.h"
 #include "mirror/art_method.h"
diff --git a/test/etc/host-run-test-jar b/test/etc/host-run-test-jar
index a844e82..d95559f 100755
--- a/test/etc/host-run-test-jar
+++ b/test/etc/host-run-test-jar
@@ -17,7 +17,7 @@
 INVOKE_WITH=""
 DEV_MODE="n"
 QUIET="n"
-COMPILER_OPTIONS=""
+FLAGS=""
 
 while true; do
     if [ "x$1" = "x--quiet" ]; then
@@ -69,7 +69,12 @@
     elif [ "x$1" = "x-Xcompiler-option" ]; then
         shift
         option="$1"
-        COMPILER_OPTIONS="${COMPILER_OPTIONS} -Xcompiler-option $option"
+        FLAGS="${FLAGS} -Xcompiler-option $option"
+        shift
+    elif [ "x$1" = "x--runtime-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} $option"
         shift
     elif [ "x$1" = "x--" ]; then
         shift
@@ -127,4 +132,4 @@
 fi
 
 cd $ANDROID_BUILD_TOP
-$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $JNI_OPTS $COMPILER_OPTIONS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main "$@"
+$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $JNI_OPTS $FLAGS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main "$@"
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index 6cf7998..b090c33 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -38,6 +38,11 @@
         option="$1"
         FLAGS="${FLAGS} -Xcompiler-option $option"
         shift
+    elif [ "x$1" = "x--runtime-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} $option"
+        shift
     elif [ "x$1" = "x--boot" ]; then
         shift
         BOOT_OPT="$1"
diff --git a/test/run-test b/test/run-test
index 1c5904f..34b06cc 100755
--- a/test/run-test
+++ b/test/run-test
@@ -99,6 +99,11 @@
         option="$1"
         run_args="${run_args} -Xcompiler-option $option"
         shift
+    elif [ "x$1" = "x--runtime-option" ]; then
+        shift
+        option="$1"
+        run_args="${run_args} --runtime-option $option"
+        shift
     elif [ "x$1" = "x--debug" ]; then
         run_args="${run_args} --debug"
         shift
@@ -190,7 +195,7 @@
         fi
         run_args="${run_args} --boot -Ximage:${ANDROID_HOST_OUT}/framework/core.art"
     else
-        run_args="${run_args} --boot -Ximage:/data/art-test${suffix64}/core.art"
+        run_args="${run_args} --boot -Ximage:/data/art-test/core.art"
     fi
 fi
 
@@ -234,6 +239,7 @@
         echo "  Runtime Options:"
         echo "    -O                   Run non-debug rather than debug build (off by default)."
         echo "    -Xcompiler-option    Pass an option to the compiler."
+        echo "    -runtime-option      Pass an option to the runtime."
         echo "    --debug              Wait for a debugger to attach."
         echo "    --gdb                Run under gdb; incompatible with some tests."
         echo "    --build-only         Build test files only (off by default)."