resolve merge conflicts of 021c143 to nyc-mr1-dev-plus-aosp

Change-Id: Id393568264e95ba50ffc04f75131e8cda27ee15e
diff --git a/Android.mk b/Android.mk
index bb1334a..4dc84c4 100644
--- a/Android.mk
+++ b/Android.mk
@@ -98,29 +98,13 @@
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
 ART_HOST_DEPENDENCIES := \
-	$(ART_HOST_EXECUTABLES) \
-	$(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar \
-	$(HOST_OUT_JAVA_LIBRARIES)/core-oj-hostdex.jar \
-	$(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
-	$(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
-	$(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvm$(ART_HOST_SHLIB_EXTENSION)
+  $(ART_HOST_EXECUTABLES) \
+  $(ART_HOST_DEX_DEPENDENCIES) \
+  $(ART_HOST_SHARED_LIBRARY_DEPENDENCIES)
 ART_TARGET_DEPENDENCIES := \
-	$(ART_TARGET_EXECUTABLES) \
-	$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar \
-	$(TARGET_OUT_JAVA_LIBRARIES)/core-oj.jar \
-	$(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
-	$(TARGET_OUT_SHARED_LIBRARIES)/libopenjdk.so \
-	$(TARGET_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
-ifdef TARGET_2ND_ARCH
-ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
-ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libopenjdk.so
-ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
-endif
-ifdef HOST_2ND_ARCH
-ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libjavacore.so
-ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libopenjdk.so
-ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
-endif
+  $(ART_TARGET_EXECUTABLES) \
+  $(ART_TARGET_DEX_DEPENDENCIES) \
+  $(ART_TARGET_SHARED_LIBRARY_DEPENDENCIES)
 
 ########################################################################
 # test rules
@@ -360,6 +344,19 @@
 
 endif  # art_test_bother
 
+# Valgrind.
+.PHONY: valgrind-test-art-target
+valgrind-test-art-target: valgrind-test-art-target-gtest
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
+.PHONY: valgrind-test-art-target32
+valgrind-test-art-target32: valgrind-test-art-target-gtest32
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
+.PHONY: valgrind-test-art-target64
+valgrind-test-art-target64: valgrind-test-art-target-gtest64
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
 ########################################################################
 # oat-target and oat-target-sync rules
 
@@ -559,6 +556,7 @@
 TEST_ART_TARGET_SYNC_DEPS :=
 
 include $(art_path)/runtime/openjdkjvm/Android.mk
+include $(art_path)/runtime/openjdkjvmti/Android.mk
 
 # Helper target that depends on boot image creation.
 #
diff --git a/benchmark/Android.mk b/benchmark/Android.mk
index a4a603a..2360bcc 100644
--- a/benchmark/Android.mk
+++ b/benchmark/Android.mk
@@ -45,7 +45,7 @@
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,debug)
     LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_MULTILIB := both
@@ -56,8 +56,8 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) $(ART_HOST_DEBUG_ASFLAGS)
+    LOCAL_LDLIBS := -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
     include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/benchmark/jni-perf/src/JniPerfBenchmark.java b/benchmark/jni-perf/src/JniPerfBenchmark.java
index b1b21ce..1e7cc2b 100644
--- a/benchmark/jni-perf/src/JniPerfBenchmark.java
+++ b/benchmark/jni-perf/src/JniPerfBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark;
-
-public class JniPerfBenchmark extends SimpleBenchmark {
+public class JniPerfBenchmark {
   private static final String MSG = "ABCDE";
 
   native void perfJniEmptyCall();
diff --git a/benchmark/jobject-benchmark/src/JObjectBenchmark.java b/benchmark/jobject-benchmark/src/JObjectBenchmark.java
index f4c059c..90a53b3 100644
--- a/benchmark/jobject-benchmark/src/JObjectBenchmark.java
+++ b/benchmark/jobject-benchmark/src/JObjectBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark;
-
-public class JObjectBenchmark extends SimpleBenchmark {
+public class JObjectBenchmark {
   public JObjectBenchmark() {
     // Make sure to link methods before benchmark starts.
     System.loadLibrary("artbenchmark");
diff --git a/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java b/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
index be276fe..0ad9c36 100644
--- a/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
+++ b/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark; 
-
-public class ScopedPrimitiveArrayBenchmark extends SimpleBenchmark {
+public class ScopedPrimitiveArrayBenchmark {
   // Measure adds the first and last element of the array by using ScopedPrimitiveArray.
   static native long measureByteArray(int reps, byte[] arr);
   static native long measureShortArray(int reps, short[] arr);
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 2294ddb..c8e3654 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -34,7 +34,7 @@
 ART_BUILD_HOST_NDEBUG ?= true
 ART_BUILD_HOST_DEBUG ?= true
 
-# Set this to change what opt level Art is built at.
+# Set this to change what opt level ART is built at.
 ART_DEBUG_OPT_FLAG ?= -O2
 ART_NDEBUG_OPT_FLAG ?= -O3
 
@@ -76,128 +76,13 @@
 endif
 
 #
-# Used to enable JIT
-#
-ART_JIT := false
-ifneq ($(wildcard art/JIT_ART),)
-$(info Enabling ART_JIT because of existence of art/JIT_ART)
-ART_JIT := true
-endif
-ifeq ($(WITH_ART_JIT), true)
-ART_JIT := true
-endif
-
-#
 # Used to change the default GC. Valid values are CMS, SS, GSS. The default is CMS.
 #
 ART_DEFAULT_GC_TYPE ?= CMS
 art_default_gc_type_cflags := -DART_DEFAULT_GC_TYPE_IS_$(ART_DEFAULT_GC_TYPE)
 
-ART_HOST_CFLAGS :=
-ART_TARGET_CFLAGS :=
-
-ART_HOST_ASFLAGS :=
-ART_TARGET_ASFLAGS :=
-
-# Clang build support.
-
-# Host.
-ART_HOST_CLANG := false
-ifneq ($(WITHOUT_HOST_CLANG),true)
-  # By default, host builds use clang for better warnings.
-  ART_HOST_CLANG := true
-endif
-
-# Clang on the target. Target builds use GCC by default.
-ifneq ($(USE_CLANG_PLATFORM_BUILD),)
-ART_TARGET_CLANG := $(USE_CLANG_PLATFORM_BUILD)
-else
-ART_TARGET_CLANG := false
-endif
-ART_TARGET_CLANG_arm :=
-ART_TARGET_CLANG_arm64 :=
-ART_TARGET_CLANG_mips :=
-ART_TARGET_CLANG_mips64 :=
-ART_TARGET_CLANG_x86 :=
-ART_TARGET_CLANG_x86_64 :=
-
-define set-target-local-clang-vars
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    $(foreach arch,$(ART_TARGET_SUPPORTED_ARCH),
-      ifneq ($$(ART_TARGET_CLANG_$(arch)),)
-        LOCAL_CLANG_$(arch) := $$(ART_TARGET_CLANG_$(arch))
-      endif)
-endef
-
-ART_TARGET_CLANG_CFLAGS :=
-ART_TARGET_CLANG_CFLAGS_arm :=
-ART_TARGET_CLANG_CFLAGS_arm64 :=
-ART_TARGET_CLANG_CFLAGS_mips :=
-ART_TARGET_CLANG_CFLAGS_mips64 :=
-ART_TARGET_CLANG_CFLAGS_x86 :=
-ART_TARGET_CLANG_CFLAGS_x86_64 :=
-
-# Warn about thread safety violations with clang.
-art_clang_cflags := -Wthread-safety -Wthread-safety-negative
-
-# Warn if switch fallthroughs aren't annotated.
-art_clang_cflags += -Wimplicit-fallthrough
-
-# Enable float equality warnings.
-art_clang_cflags += -Wfloat-equal
-
-# Enable warning of converting ints to void*.
-art_clang_cflags += -Wint-to-void-pointer-cast
-
-# Enable warning of wrong unused annotations.
-art_clang_cflags += -Wused-but-marked-unused
-
-# Enable warning for deprecated language features.
-art_clang_cflags += -Wdeprecated
-
-# Enable warning for unreachable break & return.
-art_clang_cflags += -Wunreachable-code-break -Wunreachable-code-return
-
-# Enable missing-noreturn only on non-Mac. As lots of things are not implemented for Apple, it's
-# a pain.
-ifneq ($(HOST_OS),darwin)
-  art_clang_cflags += -Wmissing-noreturn
-endif
-
-
-# GCC-only warnings.
-art_gcc_cflags := -Wunused-but-set-parameter
-# Suggest const: too many false positives, but good for a trial run.
-#                  -Wsuggest-attribute=const
-# Useless casts: too many, as we need to be 32/64 agnostic, but the compiler knows.
-#                  -Wuseless-cast
-# Zero-as-null: Have to convert all NULL and "diagnostic ignore" all includes like libnativehelper
-# that are still stuck pre-C++11.
-#                  -Wzero-as-null-pointer-constant \
-# Suggest final: Have to move to a more recent GCC.
-#                  -Wsuggest-final-types
-
-ART_TARGET_CLANG_CFLAGS := $(art_clang_cflags)
-ifeq ($(ART_HOST_CLANG),true)
-  # Bug: 15446488. We don't omit the frame pointer to work around
-  # clang/libunwind bugs that cause SEGVs in run-test-004-ThreadStress.
-  ART_HOST_CFLAGS += $(art_clang_cflags) -fno-omit-frame-pointer
-else
-  ART_HOST_CFLAGS += $(art_gcc_cflags)
-endif
-ifneq ($(ART_TARGET_CLANG),true)
-  ART_TARGET_CFLAGS += $(art_gcc_cflags)
-else
-  # TODO: if we ever want to support GCC/Clang mix for multi-target products, this needs to be
-  #       split up.
-  ifeq ($(ART_TARGET_CLANG_$(TARGET_ARCH)),false)
-    ART_TARGET_CFLAGS += $(art_gcc_cflags)
-  endif
-endif
-
-# Clear local variables now their use has ended.
-art_clang_cflags :=
-art_gcc_cflags :=
+ART_HOST_CLANG := true
+ART_TARGET_CLANG := true
 
 ART_CPP_EXTENSION := .cc
 
@@ -215,8 +100,41 @@
 # Note: technically we only need this on device, but this avoids the duplication of the includes.
 ART_C_INCLUDES += bionic/libc/private
 
+art_cflags :=
+
+# Warn about thread safety violations with clang.
+art_cflags += -Wthread-safety -Wthread-safety-negative
+
+# Warn if switch fallthroughs aren't annotated.
+art_cflags += -Wimplicit-fallthrough
+
+# Enable float equality warnings.
+art_cflags += -Wfloat-equal
+
+# Enable warning of converting ints to void*.
+art_cflags += -Wint-to-void-pointer-cast
+
+# Enable warning of wrong unused annotations.
+art_cflags += -Wused-but-marked-unused
+
+# Enable warning for deprecated language features.
+art_cflags += -Wdeprecated
+
+# Enable warning for unreachable break & return.
+art_cflags += -Wunreachable-code-break -Wunreachable-code-return
+
+# Bug: http://b/29823425  Disable -Wconstant-conversion and
+# -Wundefined-var-template for Clang update to r271374
+art_cflags += -Wno-constant-conversion -Wno-undefined-var-template
+
+# Enable missing-noreturn only on non-Mac. As lots of things are not implemented for Apple, it's
+# a pain.
+ifneq ($(HOST_OS),darwin)
+  art_cflags += -Wmissing-noreturn
+endif
+
 # Base set of cflags used by all things ART.
-art_cflags := \
+art_cflags += \
   -fno-rtti \
   -std=gnu++11 \
   -ggdb3 \
@@ -234,24 +152,19 @@
 
 # The architectures the compiled tools are able to run on. Setting this to 'all' will cause all
 # architectures to be included.
-ART_TARGET_CODEGEN_ARCHS ?= all
+ART_TARGET_CODEGEN_ARCHS ?= svelte
 ART_HOST_CODEGEN_ARCHS ?= all
 
 ifeq ($(ART_TARGET_CODEGEN_ARCHS),all)
   ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH))
-  # We need to handle the fact that some compiler tests mix code from different architectures.
-  ART_TARGET_COMPILER_TESTS ?= true
 else
-  ART_TARGET_COMPILER_TESTS := false
   ifeq ($(ART_TARGET_CODEGEN_ARCHS),svelte)
     ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_ARCH_64) $(ART_TARGET_ARCH_32))
   endif
 endif
 ifeq ($(ART_HOST_CODEGEN_ARCHS),all)
   ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH))
-  ART_HOST_COMPILER_TESTS ?= true
 else
-  ART_HOST_COMPILER_TESTS := false
   ifeq ($(ART_HOST_CODEGEN_ARCHS),svelte)
     ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_CODEGEN_ARCHS) $(ART_HOST_ARCH_64) $(ART_HOST_ARCH_32))
   endif
@@ -296,8 +209,8 @@
 ifdef ART_IMT_SIZE
   art_cflags += -DIMT_SIZE=$(ART_IMT_SIZE)
 else
-  # Default is 64
-  art_cflags += -DIMT_SIZE=64
+  # Default is 43
+  art_cflags += -DIMT_SIZE=43
 endif
 
 ifeq ($(ART_HEAP_POISONING),true)
@@ -334,44 +247,111 @@
 art_debug_cflags := \
   $(ART_DEBUG_OPT_FLAG) \
   -DDYNAMIC_ANNOTATIONS_ENABLED=1 \
+  -DVIXL_DEBUG \
   -UNDEBUG
 
+# Assembler flags for non-debug ART and ART tools.
+art_non_debug_asflags :=
+
+# Assembler flags for debug ART and ART tools.
+art_debug_asflags := -UNDEBUG
+
 art_host_non_debug_cflags := $(art_non_debug_cflags)
 art_target_non_debug_cflags := $(art_non_debug_cflags)
 
+###
+# Frame size
+###
+
+# Size of the stack-overflow gap.
+ART_STACK_OVERFLOW_GAP_arm := 8192
+ART_STACK_OVERFLOW_GAP_arm64 := 8192
+ART_STACK_OVERFLOW_GAP_mips := 16384
+ART_STACK_OVERFLOW_GAP_mips64 := 16384
+ART_STACK_OVERFLOW_GAP_x86 := 8192
+ART_STACK_OVERFLOW_GAP_x86_64 := 8192
+ART_COMMON_STACK_OVERFLOW_DEFINES := \
+  -DART_STACK_OVERFLOW_GAP_arm=$(ART_STACK_OVERFLOW_GAP_arm) \
+  -DART_STACK_OVERFLOW_GAP_arm64=$(ART_STACK_OVERFLOW_GAP_arm64) \
+  -DART_STACK_OVERFLOW_GAP_mips=$(ART_STACK_OVERFLOW_GAP_mips) \
+  -DART_STACK_OVERFLOW_GAP_mips64=$(ART_STACK_OVERFLOW_GAP_mips64) \
+  -DART_STACK_OVERFLOW_GAP_x86=$(ART_STACK_OVERFLOW_GAP_x86) \
+  -DART_STACK_OVERFLOW_GAP_x86_64=$(ART_STACK_OVERFLOW_GAP_x86_64) \
+
+# Keep these as small as possible. We have separate values as we have some host vs target
+# specific code (and previously GCC vs Clang).
+ART_HOST_FRAME_SIZE_LIMIT := 1736
+ART_TARGET_FRAME_SIZE_LIMIT := 1736
+
+# Frame size adaptations for instrumented builds.
+ifdef SANITIZE_TARGET
+  ART_TARGET_FRAME_SIZE_LIMIT := 6400
+endif
+
+# Add frame-size checks for non-debug builds.
 ifeq ($(HOST_OS),linux)
-  # Larger frame-size for host clang builds today
   ifneq ($(ART_COVERAGE),true)
     ifneq ($(NATIVE_COVERAGE),true)
-      art_host_non_debug_cflags += -Wframe-larger-than=2700
-      ifdef SANITIZE_TARGET
-        art_target_non_debug_cflags += -Wframe-larger-than=6400
-      else
-        art_target_non_debug_cflags += -Wframe-larger-than=1728
-      endif
+      art_host_non_debug_cflags += -Wframe-larger-than=$(ART_HOST_FRAME_SIZE_LIMIT)
+      art_target_non_debug_cflags += -Wframe-larger-than=$(ART_TARGET_FRAME_SIZE_LIMIT)
     endif
   endif
 endif
 
+
+ART_HOST_CFLAGS := $(art_cflags)
+ART_TARGET_CFLAGS := $(art_cflags)
+
+ART_HOST_ASFLAGS := $(art_asflags)
+ART_TARGET_ASFLAGS := $(art_asflags)
+
+# Bug: 15446488. We don't omit the frame pointer to work around
+# clang/libunwind bugs that cause SEGVs in run-test-004-ThreadStress.
+ART_HOST_CFLAGS += -fno-omit-frame-pointer
+
 ifndef LIBART_IMG_HOST_BASE_ADDRESS
   $(error LIBART_IMG_HOST_BASE_ADDRESS unset)
 endif
-ART_HOST_CFLAGS += $(art_cflags) -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
-ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default $(art_host_cflags)
-ART_HOST_ASFLAGS += $(art_asflags)
+ART_HOST_CFLAGS += -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
+ART_HOST_CFLAGS += $(art_host_cflags)
+
+ART_HOST_CFLAGS += -DART_FRAME_SIZE_LIMIT=$(ART_HOST_FRAME_SIZE_LIMIT) \
+                   $(ART_COMMON_STACK_OVERFLOW_DEFINES)
+
 
 ifndef LIBART_IMG_TARGET_BASE_ADDRESS
   $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
 endif
-ART_TARGET_CFLAGS += $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
+
+ART_TARGET_CFLAGS += -DART_TARGET \
+                     -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS) \
+
+ART_TARGET_CFLAGS += -DART_FRAME_SIZE_LIMIT=$(ART_TARGET_FRAME_SIZE_LIMIT) \
+                     $(ART_COMMON_STACK_OVERFLOW_DEFINES)
+
+ifeq ($(ART_TARGET_LINUX),true)
+# Setting ART_TARGET_LINUX to true compiles art/ assuming that the target device
+# will be running linux rather than android.
+ART_TARGET_CFLAGS += -DART_TARGET_LINUX
+else
+# The ART_TARGET_ANDROID macro is passed to target builds, which check
+# against it instead of against __ANDROID__ (which is provided by target
+# toolchains).
+ART_TARGET_CFLAGS += -DART_TARGET_ANDROID
+endif
+
 ART_TARGET_CFLAGS += $(art_target_cflags)
-ART_TARGET_ASFLAGS += $(art_asflags)
 
 ART_HOST_NON_DEBUG_CFLAGS := $(art_host_non_debug_cflags)
 ART_TARGET_NON_DEBUG_CFLAGS := $(art_target_non_debug_cflags)
 ART_HOST_DEBUG_CFLAGS := $(art_debug_cflags)
 ART_TARGET_DEBUG_CFLAGS := $(art_debug_cflags)
 
+ART_HOST_NON_DEBUG_ASFLAGS := $(art_non_debug_asflags)
+ART_TARGET_NON_DEBUG_ASFLAGS := $(art_non_debug_asflags)
+ART_HOST_DEBUG_ASFLAGS := $(art_debug_asflags)
+ART_TARGET_DEBUG_ASFLAGS := $(art_debug_asflags)
+
 ifndef LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA
   LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA=-0x1000000
 endif
@@ -400,35 +380,28 @@
 art_target_cflags :=
 art_debug_cflags :=
 art_non_debug_cflags :=
+art_debug_asflags :=
+art_non_debug_asflags :=
 art_host_non_debug_cflags :=
 art_target_non_debug_cflags :=
 art_default_gc_type_cflags :=
 
-ART_HOST_LDLIBS :=
-ifneq ($(ART_HOST_CLANG),true)
-  # GCC lacks libc++ assumed atomic operations, grab via libatomic.
-  ART_HOST_LDLIBS += -latomic
-endif
-
 ART_TARGET_LDFLAGS :=
 
 # $(1): ndebug_or_debug
 define set-target-local-cflags-vars
   LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
-  LOCAL_CFLAGS_x86 += $(ART_TARGET_CFLAGS_x86)
   LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS)
   LOCAL_LDFLAGS += $(ART_TARGET_LDFLAGS)
   art_target_cflags_ndebug_or_debug := $(1)
   ifeq ($$(art_target_cflags_ndebug_or_debug),debug)
     LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_TARGET_DEBUG_ASFLAGS)
   else
     LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_TARGET_NON_DEBUG_ASFLAGS)
   endif
 
-  LOCAL_CLANG_CFLAGS := $(ART_TARGET_CLANG_CFLAGS)
-  $(foreach arch,$(ART_TARGET_SUPPORTED_ARCH),
-    LOCAL_CLANG_CFLAGS_$(arch) += $$(ART_TARGET_CLANG_CFLAGS_$(arch)))
-
   # Clear locally used variables.
   art_target_cflags_ndebug_or_debug :=
 endef
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index 7be1894..86bb475 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -38,7 +38,7 @@
 ifneq ($(TMPDIR),)
 ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID)
 else
-ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID)
+ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID)
 endif
 
 # core.oat location on the device.
@@ -99,4 +99,18 @@
 TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
 TARGET_JACK_CLASSPATH              := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack)
 
+ART_HOST_DEX_DEPENDENCIES := $(foreach jar,$(HOST_CORE_JARS),$(HOST_OUT_JAVA_LIBRARIES)/$(jar).jar)
+ART_TARGET_DEX_DEPENDENCIES := $(foreach jar,$(TARGET_CORE_JARS),$(TARGET_OUT_JAVA_LIBRARIES)/$(jar).jar)
+
+ART_CORE_SHARED_LIBRARIES := libjavacore libopenjdk libopenjdkjvm libopenjdkjvmti
+ART_HOST_SHARED_LIBRARY_DEPENDENCIES := $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(ART_HOST_OUT_SHARED_LIBRARIES)/$(lib)$(ART_HOST_SHLIB_EXTENSION))
+ifdef HOST_2ND_ARCH
+ART_HOST_SHARED_LIBRARY_DEPENDENCIES += $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(2ND_HOST_OUT_SHARED_LIBRARIES)/$(lib).so)
+endif
+
+ART_TARGET_SHARED_LIBRARY_DEPENDENCIES := $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(TARGET_OUT_SHARED_LIBRARIES)/$(lib).so)
+ifdef TARGET_2ND_ARCH
+ART_TARGET_SHARED_LIBRARY_DEPENDENCIES += $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(2ND_TARGET_OUT_SHARED_LIBRARIES)/$(lib).so)
+endif
+
 endif # ART_ANDROID_COMMON_PATH_MK
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index df7df26..93e310e 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -57,6 +57,9 @@
 # Do you want optimizing compiler tests run?
 ART_TEST_OPTIMIZING ?= true
 
+# Do you want to test the optimizing compiler with graph coloring register allocation?
+ART_TEST_OPTIMIZING_GRAPH_COLOR ?= $(ART_TEST_FULL)
+
 # Do we want to test a PIC-compiled core image?
 ART_TEST_PIC_IMAGE ?= $(ART_TEST_FULL)
 
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index a06f45a..03791f3 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -19,11 +19,15 @@
 ART_CPPLINT := $(LOCAL_PATH)/tools/cpplint.py
 ART_CPPLINT_FILTER := --filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf
 ART_CPPLINT_FLAGS := --quiet
+ART_CPPLINT_INGORED := \
+    runtime/elf.h \
+    runtime/openjdkjvmti/jvmti.h
+
 # This:
 #  1) Gets a list of all .h & .cc files in the art directory.
 #  2) Prepends 'art/' to each of them to make the full name.
 #  3) removes art/runtime/elf.h from the list.
-ART_CPPLINT_SRC := $(filter-out $(LOCAL_PATH)/runtime/elf.h, $(addprefix $(LOCAL_PATH)/, $(call all-subdir-named-files,*.h) $(call all-subdir-named-files,*$(ART_CPP_EXTENSION))))
+ART_CPPLINT_SRC := $(filter-out $(patsubst %,$(LOCAL_PATH)/%,$(ART_CPPLINT_INGORED)), $(addprefix $(LOCAL_PATH)/, $(call all-subdir-named-files,*.h) $(call all-subdir-named-files,*$(ART_CPP_EXTENSION))))
 
 # "mm cpplint-art" to verify we aren't regressing
 .PHONY: cpplint-art
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index cb6d340..2db16af 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -80,18 +80,19 @@
   endif
 
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,$(6))
     LOCAL_SHARED_LIBRARIES += libdl
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
     LOCAL_LDLIBS += -lpthread -ldl
     ifeq ($$(art_static_or_shared),static)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index c911a17..c538c4f 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -71,7 +71,7 @@
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
-ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS)
+ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
 ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
@@ -94,17 +94,17 @@
 
 ART_GTEST_dex2oat_environment_tests_HOST_DEPS := \
   $(HOST_CORE_IMAGE_default_no-pic_64) \
-  $(HOST_CORE_IMAGE_default_no-pic_32)
+  $(HOST_CORE_IMAGE_default_no-pic_32) \
+  $(HOST_OUT_EXECUTABLES)/patchoatd
 ART_GTEST_dex2oat_environment_tests_TARGET_DEPS := \
   $(TARGET_CORE_IMAGE_default_no-pic_64) \
-  $(TARGET_CORE_IMAGE_default_no-pic_32)
+  $(TARGET_CORE_IMAGE_default_no-pic_32) \
+  $(TARGET_OUT_EXECUTABLES)/patchoatd
 
 ART_GTEST_oat_file_assistant_test_HOST_DEPS := \
-   $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS) \
-   $(HOST_OUT_EXECUTABLES)/patchoatd
+  $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS)
 ART_GTEST_oat_file_assistant_test_TARGET_DEPS := \
-   $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS) \
-   $(TARGET_OUT_EXECUTABLES)/patchoatd
+  $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS)
 
 
 ART_GTEST_dex2oat_test_HOST_DEPS := \
@@ -234,8 +234,6 @@
   runtime/interpreter/unstarted_runtime_test.cc \
   runtime/java_vm_ext_test.cc \
   runtime/jit/profile_compilation_info_test.cc \
-  runtime/lambda/closure_test.cc \
-  runtime/lambda/shorty_field_type_test.cc \
   runtime/leb128_test.cc \
   runtime/mem_map_test.cc \
   runtime/memory_region_test.cc \
@@ -292,19 +290,16 @@
   compiler/optimizing/suspend_check_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/intrusive_forward_list_test.cc \
+  compiler/utils/string_reference_test.cc \
   compiler/utils/swap_space_test.cc \
   compiler/utils/test_dex_file_builder_test.cc \
+  compiler/utils/transform_array_ref_test.cc \
+  compiler/utils/transform_iterator_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_all := \
   compiler/jni/jni_cfi_test.cc \
   compiler/optimizing/codegen_test.cc \
-  compiler/optimizing/constant_folding_test.cc \
-  compiler/optimizing/dead_code_elimination_test.cc \
-  compiler/optimizing/linearize_test.cc \
-  compiler/optimizing/liveness_test.cc \
-  compiler/optimizing/live_ranges_test.cc \
   compiler/optimizing/optimizing_cfi_test.cc \
-  compiler/optimizing/register_allocator_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_arm := \
   compiler/linker/arm/relative_patcher_thumb2_test.cc \
@@ -315,6 +310,8 @@
   compiler/utils/arm64/managed_register_arm64_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_mips := \
+  compiler/linker/mips/relative_patcher_mips_test.cc \
+  compiler/linker/mips/relative_patcher_mips32r6_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_mips64 := \
 
@@ -322,6 +319,16 @@
   compiler/linker/x86/relative_patcher_x86_test.cc \
   compiler/utils/x86/managed_register_x86_test.cc \
 
+# These tests are testing architecture-independent functionality, but happen
+# to use x86 codegen as part of the test.
+COMPILER_GTEST_COMMON_SRC_FILES_x86 += \
+  compiler/optimizing/constant_folding_test.cc \
+  compiler/optimizing/dead_code_elimination_test.cc \
+  compiler/optimizing/linearize_test.cc \
+  compiler/optimizing/live_ranges_test.cc \
+  compiler/optimizing/liveness_test.cc \
+  compiler/optimizing/register_allocator_test.cc \
+
 COMPILER_GTEST_COMMON_SRC_FILES_x86_64 := \
   compiler/linker/x86_64/relative_patcher_x86_64_test.cc \
 
@@ -356,9 +363,7 @@
   $(COMPILER_GTEST_COMMON_SRC_FILES_x86_64) \
 
 $(foreach arch,$(ART_TARGET_CODEGEN_ARCHS),$(eval COMPILER_GTEST_TARGET_SRC_FILES += $$(COMPILER_GTEST_TARGET_SRC_FILES_$(arch))))
-ifeq (true,$(ART_TARGET_COMPILER_TESTS))
-  COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all)
-endif
+COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all)
 
 COMPILER_GTEST_HOST_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES) \
@@ -378,6 +383,7 @@
 COMPILER_GTEST_HOST_SRC_FILES_mips := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips) \
   compiler/utils/mips/assembler_mips_test.cc \
+  compiler/utils/mips/assembler_mips32r6_test.cc \
 
 COMPILER_GTEST_HOST_SRC_FILES_mips64 := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips64) \
@@ -392,9 +398,7 @@
   compiler/utils/x86_64/assembler_x86_64_test.cc
 
 $(foreach arch,$(ART_HOST_CODEGEN_ARCHS),$(eval COMPILER_GTEST_HOST_SRC_FILES += $$(COMPILER_GTEST_HOST_SRC_FILES_$(arch))))
-ifeq (true,$(ART_HOST_COMPILER_TESTS))
-  COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all)
-endif
+COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all)
 
 ART_TEST_CFLAGS :=
 
@@ -408,9 +412,9 @@
 LOCAL_STATIC_LIBRARIES += libgtest
 LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
-$(eval $(call set-target-local-clang-vars))
+$(eval LOCAL_CLANG := $(ART_TARGET_CLANG))
 $(eval $(call set-target-local-cflags-vars,debug))
-LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn # gtest issue
+LOCAL_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn # gtest issue
 include $(BUILD_SHARED_LIBRARY)
 
 include $(CLEAR_VARS)
@@ -426,7 +430,7 @@
 LOCAL_LDLIBS += -ldl -lpthread
 LOCAL_MULTILIB := both
 LOCAL_CLANG := $(ART_HOST_CLANG)
-LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
+LOCAL_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
 LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
 include $(BUILD_HOST_SHARED_LIBRARY)
@@ -441,6 +445,9 @@
 ART_TEST_TARGET_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST_RULES :=
 ART_TEST_HOST_GTEST_DEPENDENCIES :=
 
 ART_GTEST_TARGET_ANDROID_ROOT := '/system'
@@ -448,6 +455,28 @@
   ART_GTEST_TARGET_ANDROID_ROOT := $(ART_TEST_ANDROID_ROOT)
 endif
 
+ART_VALGRIND_TARGET_DEPENDENCIES := \
+  $(TARGET_OUT_EXECUTABLES)/valgrind \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/memcheck-$(TARGET_ARCH)-linux \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_core-$(TARGET_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_memcheck-$(TARGET_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/default.supp
+
+ifdef TARGET_2ND_ARCH
+ART_VALGRIND_TARGET_DEPENDENCIES += \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/memcheck-$(TARGET_2ND_ARCH)-linux \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_core-$(TARGET_2ND_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_memcheck-$(TARGET_2ND_ARCH)-linux.so
+endif
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := valgrind-target-suppressions.txt
+LOCAL_MODULE_CLASS := ETC
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := test/valgrind-target-suppressions.txt
+LOCAL_MODULE_PATH := $(ART_TARGET_TEST_OUT)
+include $(BUILD_PREBUILT)
+
 # Define a make rule for a target device gtest.
 # $(1): gtest name - the name of the test we're building such as leb128_test.
 # $(2): 2ND_ or undefined - used to differentiate between the primary and secondary architecture.
@@ -464,7 +493,8 @@
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libopenjdkd.so \
     $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart-testdex.jar \
-    $$(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar
+    $$(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar \
+    $$(ART_TARGET_TEST_OUT)/valgrind-target-suppressions.txt
 
 .PHONY: $$(gtest_rule)
 $$(gtest_rule): test-art-target-sync
@@ -483,7 +513,27 @@
   ART_TEST_TARGET_GTEST_RULES += $$(gtest_rule)
   ART_TEST_TARGET_GTEST_$(1)_RULES += $$(gtest_rule)
 
+.PHONY: valgrind-$$(gtest_rule)
+valgrind-$$(gtest_rule): $(ART_VALGRIND_TARGET_DEPENDENCIES) test-art-target-sync
+	$(hide) adb shell touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID
+	$(hide) adb shell rm $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID
+	$(hide) adb shell chmod 755 $(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1)
+	$(hide) $$(call ART_TEST_SKIP,$$@) && \
+	  (adb shell "$(GCOV_ENV) LD_LIBRARY_PATH=$(3) ANDROID_ROOT=$(ART_GTEST_TARGET_ANDROID_ROOT) \
+	    valgrind --leak-check=full --error-exitcode=1 --workaround-gcc296-bugs=yes \
+	    --suppressions=$(ART_TARGET_TEST_DIR)/valgrind-target-suppressions.txt \
+	    $(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1) && touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID" \
+	  && (adb pull $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID /tmp/ \
+	      && $$(call ART_TEST_PASSED,$$@)) \
+	  || $$(call ART_TEST_FAILED,$$@))
+	$(hide) rm -f /tmp/$$@-$$$$PPID
+
+  ART_TEST_TARGET_VALGRIND_GTEST$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += valgrind-$$(gtest_rule)
+  ART_TEST_TARGET_VALGRIND_GTEST_RULES += valgrind-$$(gtest_rule)
+  ART_TEST_TARGET_VALGRIND_GTEST_$(1)_RULES += valgrind-$$(gtest_rule)
+
   # Clear locally defined variables.
+  valgrind_gtest_rule :=
   gtest_rule :=
 endef  # define-art-gtest-rule-target
 
@@ -582,13 +632,13 @@
 
   LOCAL_CFLAGS := $$(ART_TEST_CFLAGS)
   ifeq ($$(art_target_or_host),target)
-    $$(eval $$(call set-target-local-clang-vars))
+    $$(eval LOCAL_CLANG := $$(ART_TARGET_CLANG))
     $$(eval $$(call set-target-local-cflags-vars,debug))
-    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
+    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixld-arm64
     LOCAL_MODULE_PATH_32 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
-    LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
+    LOCAL_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
     include $$(BUILD_EXECUTABLE)
     library_path :=
     2nd_library_path :=
@@ -606,6 +656,7 @@
     endif
 
     ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES :=
+    ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES :=
     ifdef TARGET_2ND_ARCH
       $$(eval $$(call define-art-gtest-rule-target,$$(art_gtest_name),2ND_,$$(2nd_library_path)))
     endif
@@ -616,19 +667,24 @@
 test-art-target-gtest-$$(art_gtest_name): $$(ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES)
 	$$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
 
+.PHONY: valgrind-test-art-target-gtest-$$(art_gtest_name)
+valgrind-test-art-target-gtest-$$(art_gtest_name): $$(ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES)
+	$$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
+
     # Clear locally defined variables.
     ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES :=
+    ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES :=
   else # host
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS)
-    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixl
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -lpthread -ldl
+    LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS) $$(ART_HOST_DEBUG_ASFLAGS)
+    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixld-arm64
+    LOCAL_LDLIBS := -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
     LOCAL_MODULE_STEM_32 := $$(art_gtest_name)32
     LOCAL_MODULE_STEM_64 := $$(art_gtest_name)64
-    LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
+    LOCAL_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
     include $$(BUILD_HOST_EXECUTABLE)
 
     ART_TEST_HOST_GTEST_$$(art_gtest_name)_RULES :=
@@ -700,9 +756,6 @@
 
   rule_name := $(3)test-art-$(1)-gtest$(4)
   ifeq ($(3),valgrind-)
-    ifneq ($(1),host)
-      $$(error valgrind tests only wired up for the host)
-    endif
     dependencies := $$(ART_TEST_$(2)_VALGRIND_GTEST$(4)_RULES)
   else
     dependencies := $$(ART_TEST_$(2)_GTEST$(4)_RULES)
@@ -718,9 +771,12 @@
 endef  # define-test-art-gtest-combination
 
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,))
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,$(ART_PHONY_TEST_TARGET_SUFFIX)))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,$(ART_PHONY_TEST_TARGET_SUFFIX)))
 ifdef TARGET_2ND_ARCH
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
 endif
 $(eval $(call define-test-art-gtest-combination,host,HOST,,))
 $(eval $(call define-test-art-gtest-combination,host,HOST,valgrind-,))
@@ -752,6 +808,9 @@
 ART_TEST_TARGET_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST_RULES :=
 ART_GTEST_TARGET_ANDROID_ROOT :=
 ART_GTEST_class_linker_test_DEX_DEPS :=
 ART_GTEST_compiler_driver_test_DEX_DEPS :=
@@ -774,6 +833,7 @@
 ART_GTEST_transaction_test_DEX_DEPS :=
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS :=
 ART_VALGRIND_DEPENDENCIES :=
+ART_VALGRIND_TARGET_DEPENDENCIES :=
 $(foreach dir,$(GTEST_DEX_DIRECTORIES), $(eval ART_TEST_TARGET_GTEST_$(dir)_DEX :=))
 $(foreach dir,$(GTEST_DEX_DIRECTORIES), $(eval ART_TEST_HOST_GTEST_$(dir)_DEX :=))
 ART_TEST_HOST_GTEST_MainStripped_DEX :=
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 7c53e01..5809dcd 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -30,18 +30,15 @@
   bool UsuallyEquals(double expected, double actual);
 
   // This has a gtest dependency, which is why it's in the gtest only.
-  bool operator==(const TestProfilerOptions& lhs, const TestProfilerOptions& rhs) {
+  bool operator==(const ProfileSaverOptions& lhs, const ProfileSaverOptions& rhs) {
     return lhs.enabled_ == rhs.enabled_ &&
-        lhs.output_file_name_ == rhs.output_file_name_ &&
-        lhs.period_s_ == rhs.period_s_ &&
-        lhs.duration_s_ == rhs.duration_s_ &&
-        lhs.interval_us_ == rhs.interval_us_ &&
-        UsuallyEquals(lhs.backoff_coefficient_, rhs.backoff_coefficient_) &&
-        UsuallyEquals(lhs.start_immediately_, rhs.start_immediately_) &&
-        UsuallyEquals(lhs.top_k_threshold_, rhs.top_k_threshold_) &&
-        UsuallyEquals(lhs.top_k_change_threshold_, rhs.top_k_change_threshold_) &&
-        lhs.profile_type_ == rhs.profile_type_ &&
-        lhs.max_stack_depth_ == rhs.max_stack_depth_;
+        lhs.min_save_period_ms_ == rhs.min_save_period_ms_ &&
+        lhs.save_resolved_classes_delay_ms_ == rhs.save_resolved_classes_delay_ms_ &&
+        lhs.startup_method_samples_ == rhs.startup_method_samples_ &&
+        lhs.min_methods_to_save_ == rhs.min_methods_to_save_ &&
+        lhs.min_classes_to_save_ == rhs.min_classes_to_save_ &&
+        lhs.min_notification_before_wake_ == rhs.min_notification_before_wake_ &&
+        lhs.max_notification_before_wake_ == rhs.max_notification_before_wake_;
   }
 
   bool UsuallyEquals(double expected, double actual) {
@@ -476,68 +473,21 @@
 }  // TEST_F
 
 /*
-* -X-profile-*
+* -Xps-*
 */
-TEST_F(CmdlineParserTest, TestProfilerOptions) {
- /*
-  * Test successes
-  */
+TEST_F(CmdlineParserTest, ProfileSaverOptions) {
+  ProfileSaverOptions opt = ProfileSaverOptions(true, 1, 2, 3, 4, 5, 6, 7);
 
-  {
-    TestProfilerOptions opt;
-    opt.enabled_ = true;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xenable-profiler",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt;
-    // also need to test 'enabled'
-    opt.output_file_name_ = "hello_world.txt";
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-filename:hello_world.txt ",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt = TestProfilerOptions();
-    // also need to test 'enabled'
-    opt.output_file_name_ = "output.txt";
-    opt.period_s_ = 123u;
-    opt.duration_s_ = 456u;
-    opt.interval_us_ = 789u;
-    opt.backoff_coefficient_ = 2.0;
-    opt.start_immediately_ = true;
-    opt.top_k_threshold_ = 50.0;
-    opt.top_k_change_threshold_ = 60.0;
-    opt.profile_type_ = kProfilerMethod;
-    opt.max_stack_depth_ = 1337u;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-filename:output.txt "
-                              "-Xprofile-period:123 "
-                              "-Xprofile-duration:456 "
-                              "-Xprofile-interval:789 "
-                              "-Xprofile-backoff:2.0 "
-                              "-Xprofile-start-immediately "
-                              "-Xprofile-top-k-threshold:50.0 "
-                              "-Xprofile-top-k-change-threshold:60.0 "
-                              "-Xprofile-type:method "
-                              "-Xprofile-max-stack-depth:1337",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt = TestProfilerOptions();
-    opt.profile_type_ = kProfilerBoundedStack;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-type:stack",
-                              M::ProfilerOpts);
-  }
+  EXPECT_SINGLE_PARSE_VALUE(opt,
+                            "-Xjitsaveprofilinginfo "
+                            "-Xps-min-save-period-ms:1 "
+                            "-Xps-save-resolved-classes-delay-ms:2 "
+                            "-Xps-startup-method-samples:3 "
+                            "-Xps-min-methods-to-save:4 "
+                            "-Xps-min-classes-to-save:5 "
+                            "-Xps-min-notification-before-wake:6 "
+                            "-Xps-max-notification-before-wake:7",
+                            M::ProfileSaverOpts);
 }  // TEST_F
 
 /* -Xexperimental:_ */
@@ -551,11 +501,6 @@
   EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kNone,
                             "-Xexperimental:none",
                             M::Experimental);
-
-  // Enabled explicitly
-  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kLambdas,
-                            "-Xexperimental:lambdas",
-                            M::Experimental);
 }
 
 // -Xverify:_
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 4797540..b57383b 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -24,14 +24,16 @@
 
 // Includes for the types that are being specialized
 #include <string>
-#include "unit.h"
-#include "jdwp/jdwp.h"
 #include "base/logging.h"
 #include "base/time_utils.h"
 #include "experimental_flags.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
-#include "profiler_options.h"
+#include "jdwp/jdwp.h"
+#include "jit/profile_saver_options.h"
+#include "plugin.h"
+#include "ti/agent.h"
+#include "unit.h"
 
 namespace art {
 
@@ -381,6 +383,38 @@
 };
 
 template <>
+struct CmdlineType<std::vector<Plugin>> : CmdlineTypeParser<std::vector<Plugin>> {
+  Result Parse(const std::string& args) {
+    assert(false && "Use AppendValues() for a Plugin vector type");
+    return Result::Failure("Unconditional failure: Plugin vector must be appended: " + args);
+  }
+
+  Result ParseAndAppend(const std::string& args,
+                        std::vector<Plugin>& existing_value) {
+    existing_value.push_back(Plugin::Create(args));
+    return Result::SuccessNoValue();
+  }
+
+  static const char* Name() { return "std::vector<Plugin>"; }
+};
+
+template <>
+struct CmdlineType<std::vector<ti::Agent>> : CmdlineTypeParser<std::vector<ti::Agent>> {
+  Result Parse(const std::string& args) {
+    assert(false && "Use AppendValues() for an Agent vector type");
+    return Result::Failure("Unconditional failure: Agent vector must be appended: " + args);
+  }
+
+  Result ParseAndAppend(const std::string& args,
+                        std::vector<ti::Agent>& existing_value) {
+    existing_value.push_back(ti::Agent::Create(args));
+    return Result::SuccessNoValue();
+  }
+
+  static const char* Name() { return "std::vector<ti::Agent>"; }
+};
+
+template <>
 struct CmdlineType<std::vector<std::string>> : CmdlineTypeParser<std::vector<std::string>> {
   Result Parse(const std::string& args) {
     assert(false && "Use AppendValues() for a string vector type");
@@ -462,7 +496,7 @@
 struct XGcOption {
   // These defaults are used when the command line arguments for -Xgc:
   // are either omitted completely or partially.
-  gc::CollectorType collector_type_ =  kUseReadBarrier ?
+  gc::CollectorType collector_type_ = kUseReadBarrier ?
                                            // If RB is enabled (currently a build-time decision),
                                            // use CC as the default GC.
                                            gc::kCollectorTypeCC :
@@ -473,6 +507,7 @@
   bool verify_pre_gc_rosalloc_ = kIsDebugBuild;
   bool verify_pre_sweeping_rosalloc_ = false;
   bool verify_post_gc_rosalloc_ = false;
+  bool measure_ = kIsDebugBuild;
   bool gcstress_ = false;
 };
 
@@ -515,6 +550,8 @@
         xgc.gcstress_ = true;
       } else if (gc_option == "nogcstress") {
         xgc.gcstress_ = false;
+      } else if (gc_option == "measure") {
+        xgc.measure_ = true;
       } else if ((gc_option == "precise") ||
                  (gc_option == "noprecise") ||
                  (gc_option == "verifycardtable") ||
@@ -622,6 +659,8 @@
         log_verbosity.image = true;
       } else if (verbose_options[j] == "systrace-locks") {
         log_verbosity.systrace_lock_logging = true;
+      } else if (verbose_options[j] == "agents") {
+        log_verbosity.agents = true;
       } else {
         return Result::Usage(std::string("Unknown -verbose option ") + verbose_options[j]);
       }
@@ -633,84 +672,17 @@
   static const char* Name() { return "LogVerbosity"; }
 };
 
-// TODO: Replace with art::ProfilerOptions for the real thing.
-struct TestProfilerOptions {
-  // Whether or not the applications should be profiled.
-  bool enabled_;
-  // Destination file name where the profiling data will be saved into.
-  std::string output_file_name_;
-  // Generate profile every n seconds.
-  uint32_t period_s_;
-  // Run profile for n seconds.
-  uint32_t duration_s_;
-  // Microseconds between samples.
-  uint32_t interval_us_;
-  // Coefficient to exponential backoff.
-  double backoff_coefficient_;
-  // Whether the profile should start upon app startup or be delayed by some random offset.
-  bool start_immediately_;
-  // Top K% of samples that are considered relevant when deciding if the app should be recompiled.
-  double top_k_threshold_;
-  // How much the top K% samples needs to change in order for the app to be recompiled.
-  double top_k_change_threshold_;
-  // The type of profile data dumped to the disk.
-  ProfileDataType profile_type_;
-  // The max depth of the stack collected by the profiler
-  uint32_t max_stack_depth_;
-
-  TestProfilerOptions() :
-    enabled_(false),
-    output_file_name_(),
-    period_s_(0),
-    duration_s_(0),
-    interval_us_(0),
-    backoff_coefficient_(0),
-    start_immediately_(0),
-    top_k_threshold_(0),
-    top_k_change_threshold_(0),
-    profile_type_(ProfileDataType::kProfilerMethod),
-    max_stack_depth_(0) {
-  }
-
-  TestProfilerOptions(const TestProfilerOptions&) = default;
-  TestProfilerOptions(TestProfilerOptions&&) = default;
-};
-
-static inline std::ostream& operator<<(std::ostream& stream, const TestProfilerOptions& options) {
-  stream << "TestProfilerOptions {" << std::endl;
-
-#define PRINT_TO_STREAM(field) \
-  stream << #field << ": '" << options.field << "'" << std::endl;
-
-  PRINT_TO_STREAM(enabled_);
-  PRINT_TO_STREAM(output_file_name_);
-  PRINT_TO_STREAM(period_s_);
-  PRINT_TO_STREAM(duration_s_);
-  PRINT_TO_STREAM(interval_us_);
-  PRINT_TO_STREAM(backoff_coefficient_);
-  PRINT_TO_STREAM(start_immediately_);
-  PRINT_TO_STREAM(top_k_threshold_);
-  PRINT_TO_STREAM(top_k_change_threshold_);
-  PRINT_TO_STREAM(profile_type_);
-  PRINT_TO_STREAM(max_stack_depth_);
-
-  stream << "}";
-
-  return stream;
-#undef PRINT_TO_STREAM
-}
-
 template <>
-struct CmdlineType<TestProfilerOptions> : CmdlineTypeParser<TestProfilerOptions> {
-  using Result = CmdlineParseResult<TestProfilerOptions>;
+struct CmdlineType<ProfileSaverOptions> : CmdlineTypeParser<ProfileSaverOptions> {
+  using Result = CmdlineParseResult<ProfileSaverOptions>;
 
  private:
   using StringResult = CmdlineParseResult<std::string>;
   using DoubleResult = CmdlineParseResult<double>;
 
   template <typename T>
-  static Result ParseInto(TestProfilerOptions& options,
-                          T TestProfilerOptions::*pField,
+  static Result ParseInto(ProfileSaverOptions& options,
+                          T ProfileSaverOptions::*pField,
                           CmdlineParseResult<T>&& result) {
     assert(pField != nullptr);
 
@@ -722,36 +694,6 @@
     return Result::CastError(result);
   }
 
-  template <typename T>
-  static Result ParseIntoRangeCheck(TestProfilerOptions& options,
-                                    T TestProfilerOptions::*pField,
-                                    CmdlineParseResult<T>&& result,
-                                    T min,
-                                    T max) {
-    if (result.IsSuccess()) {
-      const T& value = result.GetValue();
-
-      if (value < min || value > max) {
-        CmdlineParseResult<T> out_of_range = CmdlineParseResult<T>::OutOfRange(value, min, max);
-        return Result::CastError(out_of_range);
-      }
-    }
-
-    return ParseInto(options, pField, std::forward<CmdlineParseResult<T>>(result));
-  }
-
-  static StringResult ParseStringAfterChar(const std::string& s, char c) {
-    std::string parsed_value;
-
-    std::string::size_type colon = s.find(c);
-    if (colon == std::string::npos) {
-      return StringResult::Usage(std::string() + "Missing char " + c + " in option " + s);
-    }
-    // Add one to remove the char we were trimming until.
-    parsed_value = s.substr(colon + 1);
-    return StringResult::Success(parsed_value);
-  }
-
   static std::string RemovePrefix(const std::string& source) {
     size_t prefix_idx = source.find(":");
 
@@ -763,87 +705,64 @@
   }
 
  public:
-  Result ParseAndAppend(const std::string& option, TestProfilerOptions& existing) {
+  Result ParseAndAppend(const std::string& option, ProfileSaverOptions& existing) {
     // Special case which doesn't include a wildcard argument definition.
     // We pass-it through as-is.
-    if (option == "-Xenable-profiler") {
+    if (option == "-Xjitsaveprofilinginfo") {
       existing.enabled_ = true;
       return Result::SuccessNoValue();
     }
 
-    // The rest of these options are always the wildcard from '-Xprofile-*'
+    // The rest of these options are always the wildcard from '-Xps-*'
     std::string suffix = RemovePrefix(option);
 
-    if (StartsWith(option, "filename:")) {
-      CmdlineType<std::string> type_parser;
-
-      return ParseInto(existing,
-                       &TestProfilerOptions::output_file_name_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "period:")) {
+    if (StartsWith(option, "min-save-period-ms:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::period_s_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "duration:")) {
+             &ProfileSaverOptions::min_save_period_ms_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "save-resolved-classes-delay-ms:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::duration_s_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "interval:")) {
+             &ProfileSaverOptions::save_resolved_classes_delay_ms_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "startup-method-samples:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::interval_us_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "backoff:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::backoff_coefficient_,
-                                 type_parser.Parse(suffix),
-                                 1.0,
-                                 10.0);
-
-    } else if (option == "start-immediately") {
-      existing.start_immediately_ = true;
-      return Result::SuccessNoValue();
-    } else if (StartsWith(option, "top-k-threshold:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::top_k_threshold_,
-                                 type_parser.Parse(suffix),
-                                 0.0,
-                                 100.0);
-    } else if (StartsWith(option, "top-k-change-threshold:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::top_k_change_threshold_,
-                                 type_parser.Parse(suffix),
-                                 0.0,
-                                 100.0);
-    } else if (option == "type:method") {
-      existing.profile_type_ = kProfilerMethod;
-      return Result::SuccessNoValue();
-    } else if (option == "type:stack") {
-      existing.profile_type_ = kProfilerBoundedStack;
-      return Result::SuccessNoValue();
-    } else if (StartsWith(option, "max-stack-depth:")) {
+             &ProfileSaverOptions::startup_method_samples_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-methods-to-save:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::max_stack_depth_,
-                       type_parser.Parse(suffix));
+             &ProfileSaverOptions::min_methods_to_save_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-classes-to-save:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::min_classes_to_save_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-notification-before-wake:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::min_notification_before_wake_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "max-notification-before-wake:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::max_notification_before_wake_,
+             type_parser.Parse(suffix));
     } else {
       return Result::Failure(std::string("Invalid suboption '") + option + "'");
     }
   }
 
-  static const char* Name() { return "TestProfilerOptions"; }
+  static const char* Name() { return "ProfileSaverOptions"; }
   static constexpr bool kCanParseBlankless = true;
 };
 
@@ -852,8 +771,10 @@
   Result ParseAndAppend(const std::string& option, ExperimentalFlags& existing) {
     if (option == "none") {
       existing = ExperimentalFlags::kNone;
-    } else if (option == "lambdas") {
-      existing = existing | ExperimentalFlags::kLambdas;
+    } else if (option == "agents") {
+      existing = existing | ExperimentalFlags::kAgents;
+    } else if (option == "runtime-plugins") {
+      existing = existing | ExperimentalFlags::kRuntimePlugins;
     } else {
       return Result::Failure(std::string("Unknown option '") + option + "'");
     }
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h
index 4b56804..84beff5 100644
--- a/cmdline/detail/cmdline_parse_argument_detail.h
+++ b/cmdline/detail/cmdline_parse_argument_detail.h
@@ -497,7 +497,7 @@
       std::function<void(TArg&)> save_argument_;
       std::function<TArg&(void)> load_argument_;
     };
-  } // namespace detail // NOLINT [readability/namespace] [5] [whitespace/comments] [2]
+  }  // namespace detail  // NOLINT [readability/namespace] [5]
 }  // namespace art
 
 #endif  // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
diff --git a/cmdline/detail/cmdline_parser_detail.h b/cmdline/detail/cmdline_parser_detail.h
index 9b43bb0..24dbca2 100644
--- a/cmdline/detail/cmdline_parser_detail.h
+++ b/cmdline/detail/cmdline_parser_detail.h
@@ -35,7 +35,7 @@
      private:
       template <typename TStream, typename T>
       static std::true_type InsertionOperatorTest(TStream& os, const T& value,
-                                                  std::remove_reference<decltype(os << value)>* = 0); // NOLINT [whitespace/operators] [3]
+                                                  std::remove_reference<decltype(os << value)>* = 0);  // NOLINT [whitespace/operators] [3]
 
       template <typename TStream, typename ... T>
       static std::false_type InsertionOperatorTest(TStream& os, const T& ... args);
@@ -53,7 +53,7 @@
      private:
       template <typename TL, typename TR>
       static std::true_type EqualityOperatorTest(const TL& left, const TR& right,
-                                                 std::remove_reference<decltype(left == right)>* = 0); // NOLINT [whitespace/operators] [3]
+                                                 std::remove_reference<decltype(left == right)>* = 0);  // NOLINT [whitespace/operators] [3]
 
       template <typename TL, typename ... T>
       static std::false_type EqualityOperatorTest(const TL& left, const T& ... args);
diff --git a/compiler/Android.mk b/compiler/Android.mk
index e9c22d2..16c6a7b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -47,7 +47,6 @@
 	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
-	optimizing/dex_cache_array_fixups_arm.cc \
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
@@ -61,13 +60,15 @@
 	optimizing/load_store_elimination.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
-	optimizing/nodes_arm64.cc \
 	optimizing/optimization.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/reference_type_propagation.cc \
+	optimizing/register_allocation_resolver.cc \
 	optimizing/register_allocator.cc \
+	optimizing/register_allocator_graph_color.cc \
+	optimizing/register_allocator_linear_scan.cc \
 	optimizing/select_generator.cc \
 	optimizing/sharpening.cc \
 	optimizing/side_effects_analysis.cc \
@@ -77,6 +78,7 @@
 	optimizing/stack_map_stream.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
+	utils/jni_macro_assembler.cc \
 	utils/swap_space.cc \
 	compiler.cc \
 	elf_writer.cc \
@@ -89,10 +91,14 @@
 	linker/arm/relative_patcher_arm_base.cc \
 	linker/arm/relative_patcher_thumb2.cc \
 	optimizing/code_generator_arm.cc \
+	optimizing/dex_cache_array_fixups_arm.cc \
+	optimizing/instruction_simplifier_arm.cc \
+	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm.cc \
 	utils/arm/assembler_arm.cc \
 	utils/arm/assembler_arm32.cc \
 	utils/arm/assembler_thumb2.cc \
+	utils/arm/jni_macro_assembler_arm.cc \
 	utils/arm/managed_register_arm.cc \
 
 # TODO We should really separate out those files that are actually needed for both variants of an
@@ -103,18 +109,22 @@
     $(LIBART_COMPILER_SRC_FILES_arm) \
 	jni/quick/arm64/calling_convention_arm64.cc \
 	linker/arm64/relative_patcher_arm64.cc \
+	optimizing/nodes_arm64.cc \
 	optimizing/code_generator_arm64.cc \
-	optimizing/instruction_simplifier_arm.cc \
 	optimizing/instruction_simplifier_arm64.cc \
 	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm64.cc \
 	utils/arm64/assembler_arm64.cc \
+	utils/arm64/jni_macro_assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
 
 LIBART_COMPILER_SRC_FILES_mips := \
 	jni/quick/mips/calling_convention_mips.cc \
+	linker/mips/relative_patcher_mips.cc \
 	optimizing/code_generator_mips.cc \
+	optimizing/dex_cache_array_fixups_mips.cc \
 	optimizing/intrinsics_mips.cc \
+	optimizing/pc_relative_fixups_mips.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 
@@ -134,7 +144,9 @@
 	optimizing/code_generator_x86.cc \
 	optimizing/intrinsics_x86.cc \
 	optimizing/pc_relative_fixups_x86.cc \
+	optimizing/x86_memory_gen.cc \
 	utils/x86/assembler_x86.cc \
+	utils/x86/jni_macro_assembler_x86.cc \
 	utils/x86/managed_register_x86.cc \
 
 LIBART_COMPILER_SRC_FILES_x86_64 := \
@@ -144,6 +156,7 @@
 	optimizing/intrinsics_x86_64.cc \
 	optimizing/code_generator_x86_64.cc \
 	utils/x86_64/assembler_x86_64.cc \
+	utils/x86_64/jni_macro_assembler_x86_64.cc \
 	utils/x86_64/managed_register_x86_64.cc \
 
 
@@ -151,7 +164,6 @@
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES := \
   compiled_method.h \
-  dex/compiler_enums.h \
   dex/dex_to_dex_compiler.h \
   driver/compiler_driver.h \
   driver/compiler_options.h \
@@ -247,20 +259,21 @@
 
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,$(2))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
     ifeq ($$(art_static_or_shared),static)
       LOCAL_LDFLAGS += -static
     endif
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
@@ -272,18 +285,18 @@
   endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-  # Vixl assembly support for ARM64 targets.
+  # VIXL assembly support for ARM64 targets.
   ifeq ($$(art_ndebug_or_debug),debug)
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_WHOLESTATIC_LIBRARIES += libvixl
+      LOCAL_WHOLESTATIC_LIBRARIES += libvixld-arm64
     else
-      LOCAL_SHARED_LIBRARIES += libvixl
+      LOCAL_SHARED_LIBRARIES += libvixld-arm64
     endif
   else
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_WHOLE_STATIC_LIBRARIES += libvixl
+      LOCAL_WHOLE_STATIC_LIBRARIES += libvixl-arm64
     else
-      LOCAL_SHARED_LIBRARIES += libvixl
+      LOCAL_SHARED_LIBRARIES += libvixl-arm64
     endif
   endif
 
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index f8b7460..c754e55 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -22,11 +22,13 @@
 #include <sstream>
 
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "debug/dwarf/dwarf_constants.h"
 #include "debug/dwarf/dwarf_test.h"
 #include "debug/dwarf/headers.h"
 #include "disassembler/disassembler.h"
 #include "gtest/gtest.h"
+#include "thread.h"
 
 namespace art {
 
@@ -57,7 +59,13 @@
     // Pretty-print assembly.
     const uint8_t* asm_base = actual_asm.data();
     const uint8_t* asm_end = asm_base + actual_asm.size();
-    auto* opts = new DisassemblerOptions(false, asm_base, asm_end, true);
+    auto* opts = new DisassemblerOptions(false,
+                                         asm_base,
+                                         asm_end,
+                                         true,
+                                         is64bit
+                                             ? &Thread::DumpThreadOffset<PointerSize::k64>
+                                             : &Thread::DumpThreadOffset<PointerSize::k32>);
     std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts));
     std::stringstream stream;
     const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0);
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index bf29e1c..06a39b2 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -19,6 +19,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "compiled_method.h"
 #include "dex/quick_compiler_callbacks.h"
@@ -115,7 +116,7 @@
   Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
   mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
   CHECK(klass != nullptr) << "Class not found " << class_name;
-  size_t pointer_size = class_linker_->GetImagePointerSize();
+  PointerSize pointer_size = class_linker_->GetImagePointerSize();
   for (auto& m : klass->GetMethods(pointer_size)) {
     MakeExecutable(&m);
   }
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 2d139eb..c942375 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -115,13 +115,6 @@
   std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
-// TODO: When read barrier works with all tests, get rid of this.
-#define TEST_DISABLED_FOR_READ_BARRIER() \
-  if (kUseReadBarrier) { \
-    printf("WARNING: TEST DISABLED FOR READ BARRIER\n"); \
-    return; \
-  }
-
 // TODO: When read barrier works with all Optimizing back ends, get rid of this.
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 9479ff3..2a81804 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -174,6 +174,7 @@
     kCall,
     kCallRelative,     // NOTE: Actual patching is instruction_set-dependent.
     kType,
+    kTypeRelative,     // NOTE: Actual patching is instruction_set-dependent.
     kString,
     kStringRelative,   // NOTE: Actual patching is instruction_set-dependent.
     kDexCacheArray,    // NOTE: Actual patching is instruction_set-dependent.
@@ -215,6 +216,16 @@
     return patch;
   }
 
+  static LinkerPatch RelativeTypePatch(size_t literal_offset,
+                                       const DexFile* target_dex_file,
+                                       uint32_t pc_insn_offset,
+                                       uint32_t target_type_idx) {
+    LinkerPatch patch(literal_offset, Type::kTypeRelative, target_dex_file);
+    patch.type_idx_ = target_type_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
+    return patch;
+  }
+
   static LinkerPatch StringPatch(size_t literal_offset,
                                  const DexFile* target_dex_file,
                                  uint32_t target_string_idx) {
@@ -258,6 +269,7 @@
   bool IsPcRelative() const {
     switch (GetType()) {
       case Type::kCallRelative:
+      case Type::kTypeRelative:
       case Type::kStringRelative:
       case Type::kDexCacheArray:
         return true;
@@ -274,12 +286,12 @@
   }
 
   const DexFile* TargetTypeDexFile() const {
-    DCHECK(patch_type_ == Type::kType);
+    DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
     return target_dex_file_;
   }
 
   uint32_t TargetTypeIndex() const {
-    DCHECK(patch_type_ == Type::kType);
+    DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
     return type_idx_;
   }
 
@@ -304,7 +316,9 @@
   }
 
   uint32_t PcInsnOffset() const {
-    DCHECK(patch_type_ == Type::kStringRelative || patch_type_ == Type::kDexCacheArray);
+    DCHECK(patch_type_ == Type::kTypeRelative ||
+           patch_type_ == Type::kStringRelative ||
+           patch_type_ == Type::kDexCacheArray);
     return pc_insn_offset_;
   }
 
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 487a27f..a955f3c 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -38,6 +38,11 @@
     kOptimizing
   };
 
+  enum JniOptimizationFlags {
+    kNone,
+    kFastNative,
+  };
+
   static Compiler* Create(CompilerDriver* driver, Kind kind);
 
   virtual void Init() = 0;
@@ -57,7 +62,8 @@
 
   virtual CompiledMethod* JniCompile(uint32_t access_flags,
                                      uint32_t method_idx,
-                                     const DexFile& dex_file) const = 0;
+                                     const DexFile& dex_file,
+                                     JniOptimizationFlags optimization_flags) const = 0;
 
   virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
                           jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
diff --git a/compiler/debug/dwarf/dwarf_test.cc b/compiler/debug/dwarf/dwarf_test.cc
index 2ba3af5..866bf43 100644
--- a/compiler/debug/dwarf/dwarf_test.cc
+++ b/compiler/debug/dwarf/dwarf_test.cc
@@ -27,7 +27,7 @@
 namespace dwarf {
 
 // Run the tests only on host since we need objdump.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT;
 
@@ -341,7 +341,7 @@
   CheckObjdumpOutput(is64bit, "-W");
 }
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
deleted file mode 100644
index 8800e4b..0000000
--- a/compiler/dex/compiler_enums.h
+++ /dev/null
@@ -1,677 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_COMPILER_ENUMS_H_
-#define ART_COMPILER_DEX_COMPILER_ENUMS_H_
-
-#include "dex_instruction.h"
-
-namespace art {
-
-enum RegisterClass {
-  kInvalidRegClass,
-  kCoreReg,
-  kFPReg,
-  kRefReg,
-  kAnyReg,
-};
-std::ostream& operator<<(std::ostream& os, const RegisterClass& rhs);
-
-enum BitsUsed {
-  kSize32Bits,
-  kSize64Bits,
-  kSize128Bits,
-  kSize256Bits,
-  kSize512Bits,
-  kSize1024Bits,
-};
-std::ostream& operator<<(std::ostream& os, const BitsUsed& rhs);
-
-enum SpecialTargetRegister {
-  kSelf,            // Thread pointer.
-  kSuspend,         // Used to reduce suspend checks for some targets.
-  kLr,
-  kPc,
-  kSp,
-  kArg0,
-  kArg1,
-  kArg2,
-  kArg3,
-  kArg4,
-  kArg5,
-  kArg6,
-  kArg7,
-  kFArg0,
-  kFArg1,
-  kFArg2,
-  kFArg3,
-  kFArg4,
-  kFArg5,
-  kFArg6,
-  kFArg7,
-  kFArg8,
-  kFArg9,
-  kFArg10,
-  kFArg11,
-  kFArg12,
-  kFArg13,
-  kFArg14,
-  kFArg15,
-  kRet0,
-  kRet1,
-  kInvokeTgt,
-  kHiddenArg,
-  kHiddenFpArg,
-  kCount
-};
-std::ostream& operator<<(std::ostream& os, const SpecialTargetRegister& code);
-
-enum RegLocationType {
-  kLocDalvikFrame = 0,  // Normal Dalvik register
-  kLocPhysReg,
-  kLocCompilerTemp,
-  kLocInvalid
-};
-std::ostream& operator<<(std::ostream& os, const RegLocationType& rhs);
-
-enum BBType {
-  kNullBlock,
-  kEntryBlock,
-  kDalvikByteCode,
-  kExitBlock,
-  kExceptionHandling,
-  kDead,
-};
-std::ostream& operator<<(std::ostream& os, const BBType& code);
-
-// Shared pseudo opcodes - must be < 0.
-enum LIRPseudoOpcode {
-  kPseudoPrologueBegin = -18,
-  kPseudoPrologueEnd = -17,
-  kPseudoEpilogueBegin = -16,
-  kPseudoEpilogueEnd = -15,
-  kPseudoExportedPC = -14,
-  kPseudoSafepointPC = -13,
-  kPseudoIntrinsicRetry = -12,
-  kPseudoSuspendTarget = -11,
-  kPseudoThrowTarget = -10,
-  kPseudoCaseLabel = -9,
-  kPseudoBarrier = -8,
-  kPseudoEntryBlock = -7,
-  kPseudoExitBlock = -6,
-  kPseudoTargetLabel = -5,
-  kPseudoDalvikByteCodeBoundary = -4,
-  kPseudoPseudoAlign4 = -3,
-  kPseudoEHBlockLabel = -2,
-  kPseudoNormalBlockLabel = -1,
-};
-std::ostream& operator<<(std::ostream& os, const LIRPseudoOpcode& rhs);
-
-enum ExtendedMIROpcode {
-  kMirOpFirst = kNumPackedOpcodes,
-  kMirOpPhi = kMirOpFirst,
-
-  // @brief Copy from one VR to another.
-  // @details
-  // vA: destination VR
-  // vB: source VR
-  kMirOpCopy,
-
-  // @brief Used to do float comparison with less-than bias.
-  // @details Unlike cmpl-float, this does not store result of comparison in VR.
-  // vA: left-hand side VR for comparison.
-  // vB: right-hand side VR for comparison.
-  kMirOpFusedCmplFloat,
-
-  // @brief Used to do float comparison with greater-than bias.
-  // @details Unlike cmpg-float, this does not store result of comparison in VR.
-  // vA: left-hand side VR for comparison.
-  // vB: right-hand side VR for comparison.
-  kMirOpFusedCmpgFloat,
-
-  // @brief Used to do double comparison with less-than bias.
-  // @details Unlike cmpl-double, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmplDouble,
-
-  // @brief Used to do double comparison with greater-than bias.
-  // @details Unlike cmpl-double, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmpgDouble,
-
-  // @brief Used to do comparison of 64-bit long integers.
-  // @details Unlike cmp-long, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmpLong,
-
-  // @brief This represents no-op.
-  kMirOpNop,
-
-  // @brief Do a null check on the object register.
-  // @details The backends may implement this implicitly or explicitly. This MIR is guaranteed
-  // to have the correct offset as an exception thrower.
-  // vA: object register
-  kMirOpNullCheck,
-
-  kMirOpRangeCheck,
-  kMirOpDivZeroCheck,
-  kMirOpCheck,
-  kMirOpSelect,
-
-  // Vector opcodes:
-  // TypeSize is an encoded field giving the element type and the vector size.
-  // It is encoded as OpSize << 16 | (number of bits in vector)
-  //
-  // Destination and source are integers that will be interpreted by the
-  // backend that supports Vector operations.  Backends are permitted to support only
-  // certain vector register sizes.
-  //
-  // At this point, only two operand instructions are supported.  Three operand instructions
-  // could be supported by using a bit in TypeSize and arg[0] where needed.
-
-  // @brief MIR to move constant data to a vector register
-  // vA: destination
-  // vB: number of bits in register
-  // args[0]~args[3]: up to 128 bits of data for initialization
-  kMirOpConstVector,
-
-  // @brief MIR to move a vectorized register to another
-  // vA: destination
-  // vB: source
-  // vC: TypeSize
-  kMirOpMoveVector,
-
-  // @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedMultiply,
-
-  // @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedAddition,
-
-  // @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedSubtract,
-
-  // @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedShiftLeft,
-
-  // @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedSignedShiftRight,
-
-  // @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedUnsignedShiftRight,
-
-  // @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedAnd,
-
-  // @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedOr,
-
-  // @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedXor,
-
-  // @brief Reduce a 128-bit packed element into a single VR by taking lower bits
-  // @details Instruction does a horizontal addition of the packed elements and then adds it to VR
-  // vA: destination and source VR (not vector register)
-  // vB: source (vector register)
-  // vC: TypeSize
-  kMirOpPackedAddReduce,
-
-  // @brief Extract a packed element into a single VR.
-  // vA: destination VR (not vector register)
-  // vB: source (vector register)
-  // vC: TypeSize
-  // arg[0]: The index to use for extraction from vector register (which packed element)
-  kMirOpPackedReduce,
-
-  // @brief Create a vector value, with all TypeSize values equal to vC
-  // vA: destination vector register
-  // vB: source VR (not vector register)
-  // vC: TypeSize
-  kMirOpPackedSet,
-
-  // @brief Reserve a range of vector registers.
-  // vA: Start vector register to reserve.
-  // vB: Inclusive end vector register to reserve.
-  // @note: The backend may choose to map vector numbers used in vector opcodes.
-  //  Reserved registers are removed from the list of backend temporary pool.
-  kMirOpReserveVectorRegisters,
-
-  // @brief Free a range of reserved vector registers
-  // vA: Start vector register to unreserve.
-  // vB: Inclusive end vector register to unreserve.
-  // @note: All currently reserved vector registers are returned to the temporary pool.
-  kMirOpReturnVectorRegisters,
-
-  // @brief Create a memory barrier.
-  // vA: a constant defined by enum MemBarrierKind.
-  kMirOpMemBarrier,
-
-  // @brief Used to fill a vector register with array values.
-  // @details Just as with normal arrays, access on null object register must ensure NullPointerException
-  // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same
-  // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that
-  // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with
-  // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK.
-  // vA: destination vector register
-  // vB: array register
-  // vC: index register
-  // arg[0]: TypeSize (most other vector opcodes have this in vC)
-  kMirOpPackedArrayGet,
-
-  // @brief Used to store a vector register into array.
-  // @details Just as with normal arrays, access on null object register must ensure NullPointerException
-  // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same
-  // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that
-  // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with
-  // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK.
-  // vA: source vector register
-  // vB: array register
-  // vC: index register
-  // arg[0]: TypeSize (most other vector opcodes have this in vC)
-  kMirOpPackedArrayPut,
-
-  // @brief Multiply-add integer.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: addend
-  kMirOpMaddInt,
-
-  // @brief Multiply-subtract integer.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: minuend
-  kMirOpMsubInt,
-
-  // @brief Multiply-add long.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: addend
-  kMirOpMaddLong,
-
-  // @brief Multiply-subtract long.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: minuend
-  kMirOpMsubLong,
-
-  kMirOpLast,
-};
-
-enum MIROptimizationFlagPositions {
-  kMIRIgnoreNullCheck = 0,
-  kMIRIgnoreRangeCheck,
-  kMIRIgnoreCheckCast,
-  kMIRStoreNonNullValue,              // Storing non-null value, always mark GC card.
-  kMIRClassIsInitialized,
-  kMIRClassIsInDexCache,
-  kMirIgnoreDivZeroCheck,
-  kMIRInlined,                        // Invoke is inlined (ie dead).
-  kMIRInlinedPred,                    // Invoke is inlined via prediction.
-  kMIRCallee,                         // Instruction is inlined from callee.
-  kMIRIgnoreSuspendCheck,
-  kMIRDup,
-  kMIRMark,                           // Temporary node mark can be used by
-                                      // opt passes for their private needs.
-  kMIRStoreNonTemporal,
-  kMIRLastMIRFlag,
-};
-
-// For successor_block_list.
-enum BlockListType {
-  kNotUsed = 0,
-  kCatch,
-  kPackedSwitch,
-  kSparseSwitch,
-};
-std::ostream& operator<<(std::ostream& os, const BlockListType& rhs);
-
-enum AssemblerStatus {
-  kSuccess,
-  kRetryAll,
-};
-std::ostream& operator<<(std::ostream& os, const AssemblerStatus& rhs);
-
-enum OpSize {
-  kWord,            // Natural word size of target (32/64).
-  k32,
-  k64,
-  kReference,       // Object reference; compressed on 64-bit targets.
-  kSingle,
-  kDouble,
-  kUnsignedHalf,
-  kSignedHalf,
-  kUnsignedByte,
-  kSignedByte,
-};
-std::ostream& operator<<(std::ostream& os, const OpSize& kind);
-
-enum OpKind {
-  kOpMov,
-  kOpCmov,
-  kOpMvn,
-  kOpCmp,
-  kOpLsl,
-  kOpLsr,
-  kOpAsr,
-  kOpRor,
-  kOpNot,
-  kOpAnd,
-  kOpOr,
-  kOpXor,
-  kOpNeg,
-  kOpAdd,
-  kOpAdc,
-  kOpSub,
-  kOpSbc,
-  kOpRsub,
-  kOpMul,
-  kOpDiv,
-  kOpRem,
-  kOpBic,
-  kOpCmn,
-  kOpTst,
-  kOpRev,
-  kOpRevsh,
-  kOpBkpt,
-  kOpBlx,
-  kOpPush,
-  kOpPop,
-  kOp2Char,
-  kOp2Short,
-  kOp2Byte,
-  kOpCondBr,
-  kOpUncondBr,
-  kOpBx,
-  kOpInvalid,
-};
-std::ostream& operator<<(std::ostream& os, const OpKind& rhs);
-
-enum MoveType {
-  kMov8GP,      // Move 8-bit general purpose register.
-  kMov16GP,     // Move 16-bit general purpose register.
-  kMov32GP,     // Move 32-bit general purpose register.
-  kMov64GP,     // Move 64-bit general purpose register.
-  kMov32FP,     // Move 32-bit FP register.
-  kMov64FP,     // Move 64-bit FP register.
-  kMovLo64FP,   // Move low 32-bits of 64-bit FP register.
-  kMovHi64FP,   // Move high 32-bits of 64-bit FP register.
-  kMovU128FP,   // Move 128-bit FP register to/from possibly unaligned region.
-  kMov128FP = kMovU128FP,
-  kMovA128FP,   // Move 128-bit FP register to/from region surely aligned to 16-bytes.
-  kMovLo128FP,  // Move low 64-bits of 128-bit FP register.
-  kMovHi128FP,  // Move high 64-bits of 128-bit FP register.
-};
-std::ostream& operator<<(std::ostream& os, const MoveType& kind);
-
-enum ConditionCode {
-  kCondEq,  // equal
-  kCondNe,  // not equal
-  kCondCs,  // carry set
-  kCondCc,  // carry clear
-  kCondUlt,  // unsigned less than
-  kCondUge,  // unsigned greater than or same
-  kCondMi,  // minus
-  kCondPl,  // plus, positive or zero
-  kCondVs,  // overflow
-  kCondVc,  // no overflow
-  kCondHi,  // unsigned greater than
-  kCondLs,  // unsigned lower or same
-  kCondGe,  // signed greater than or equal
-  kCondLt,  // signed less than
-  kCondGt,  // signed greater than
-  kCondLe,  // signed less than or equal
-  kCondAl,  // always
-  kCondNv,  // never
-};
-std::ostream& operator<<(std::ostream& os, const ConditionCode& kind);
-
-// Target specific condition encodings
-enum ArmConditionCode {
-  kArmCondEq = 0x0,  // 0000
-  kArmCondNe = 0x1,  // 0001
-  kArmCondCs = 0x2,  // 0010
-  kArmCondCc = 0x3,  // 0011
-  kArmCondMi = 0x4,  // 0100
-  kArmCondPl = 0x5,  // 0101
-  kArmCondVs = 0x6,  // 0110
-  kArmCondVc = 0x7,  // 0111
-  kArmCondHi = 0x8,  // 1000
-  kArmCondLs = 0x9,  // 1001
-  kArmCondGe = 0xa,  // 1010
-  kArmCondLt = 0xb,  // 1011
-  kArmCondGt = 0xc,  // 1100
-  kArmCondLe = 0xd,  // 1101
-  kArmCondAl = 0xe,  // 1110
-  kArmCondNv = 0xf,  // 1111
-};
-std::ostream& operator<<(std::ostream& os, const ArmConditionCode& kind);
-
-enum X86ConditionCode {
-  kX86CondO   = 0x0,    // overflow
-  kX86CondNo  = 0x1,    // not overflow
-
-  kX86CondB   = 0x2,    // below
-  kX86CondNae = kX86CondB,  // not-above-equal
-  kX86CondC   = kX86CondB,  // carry
-
-  kX86CondNb  = 0x3,    // not-below
-  kX86CondAe  = kX86CondNb,  // above-equal
-  kX86CondNc  = kX86CondNb,  // not-carry
-
-  kX86CondZ   = 0x4,    // zero
-  kX86CondEq  = kX86CondZ,  // equal
-
-  kX86CondNz  = 0x5,    // not-zero
-  kX86CondNe  = kX86CondNz,  // not-equal
-
-  kX86CondBe  = 0x6,    // below-equal
-  kX86CondNa  = kX86CondBe,  // not-above
-
-  kX86CondNbe = 0x7,    // not-below-equal
-  kX86CondA   = kX86CondNbe,  // above
-
-  kX86CondS   = 0x8,    // sign
-  kX86CondNs  = 0x9,    // not-sign
-
-  kX86CondP   = 0xa,    // 8-bit parity even
-  kX86CondPE  = kX86CondP,
-
-  kX86CondNp  = 0xb,    // 8-bit parity odd
-  kX86CondPo  = kX86CondNp,
-
-  kX86CondL   = 0xc,    // less-than
-  kX86CondNge = kX86CondL,  // not-greater-equal
-
-  kX86CondNl  = 0xd,    // not-less-than
-  kX86CondGe  = kX86CondNl,  // not-greater-equal
-
-  kX86CondLe  = 0xe,    // less-than-equal
-  kX86CondNg  = kX86CondLe,  // not-greater
-
-  kX86CondNle = 0xf,    // not-less-than
-  kX86CondG   = kX86CondNle,  // greater
-};
-std::ostream& operator<<(std::ostream& os, const X86ConditionCode& kind);
-
-enum DividePattern {
-  DivideNone,
-  Divide3,
-  Divide5,
-  Divide7,
-};
-std::ostream& operator<<(std::ostream& os, const DividePattern& pattern);
-
-/**
- * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
- * @details We define the combined barrier types that are actually required
- * by the Java Memory Model, rather than using exactly the terminology from
- * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
- * primitives.  Note that the JSR-133 cookbook generally does not deal with
- * store atomicity issues, and the recipes there are not always entirely sufficient.
- * The current recipe is as follows:
- * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
- * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
- * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
- * -# Use StoreStore barrier after all stores but before return from any constructor whose
- *    class has final fields.
- * -# Use NTStoreStore to order non-temporal stores with respect to all later
- *    store-to-memory instructions.  Only generated together with non-temporal stores.
- */
-enum MemBarrierKind {
-  kAnyStore,
-  kLoadAny,
-  kStoreStore,
-  kAnyAny,
-  kNTStoreStore,
-  kLastBarrierKind = kNTStoreStore
-};
-std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
-
-enum OpFeatureFlags {
-  kIsBranch = 0,
-  kNoOperand,
-  kIsUnaryOp,
-  kIsBinaryOp,
-  kIsTertiaryOp,
-  kIsQuadOp,
-  kIsQuinOp,
-  kIsSextupleOp,
-  kIsIT,
-  kIsMoveOp,
-  kMemLoad,
-  kMemStore,
-  kMemVolatile,
-  kMemScaledx0,
-  kMemScaledx2,
-  kMemScaledx4,
-  kPCRelFixup,  // x86 FIXME: add NEEDS_FIXUP to instruction attributes.
-  kRegDef0,
-  kRegDef1,
-  kRegDef2,
-  kRegDefA,
-  kRegDefD,
-  kRegDefFPCSList0,
-  kRegDefFPCSList2,
-  kRegDefList0,
-  kRegDefList1,
-  kRegDefList2,
-  kRegDefLR,
-  kRegDefSP,
-  kRegUse0,
-  kRegUse1,
-  kRegUse2,
-  kRegUse3,
-  kRegUse4,
-  kRegUseA,
-  kRegUseC,
-  kRegUseD,
-  kRegUseB,
-  kRegUseFPCSList0,
-  kRegUseFPCSList2,
-  kRegUseList0,
-  kRegUseList1,
-  kRegUseLR,
-  kRegUsePC,
-  kRegUseSP,
-  kSetsCCodes,
-  kUsesCCodes,
-  kUseFpStack,
-  kUseHi,
-  kUseLo,
-  kDefHi,
-  kDefLo
-};
-std::ostream& operator<<(std::ostream& os, const OpFeatureFlags& rhs);
-
-enum SelectInstructionKind {
-  kSelectNone,
-  kSelectConst,
-  kSelectMove,
-  kSelectGoto
-};
-std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind);
-
-// LIR fixup kinds for Arm and X86.
-enum FixupKind {
-  kFixupNone,
-  kFixupLabel,             // For labels we just adjust the offset.
-  kFixupLoad,              // Mostly for immediates.
-  kFixupVLoad,             // FP load which *may* be pc-relative.
-  kFixupCBxZ,              // Cbz, Cbnz.
-  kFixupTBxZ,              // Tbz, Tbnz.
-  kFixupCondBranch,        // Conditional branch
-  kFixupT1Branch,          // Thumb1 Unconditional branch
-  kFixupT2Branch,          // Thumb2 Unconditional branch
-  kFixupBlx1,              // Blx1 (start of Blx1/Blx2 pair).
-  kFixupBl1,               // Bl1 (start of Bl1/Bl2 pair).
-  kFixupAdr,               // Adr.
-  kFixupMovImmLST,         // kThumb2MovImm16LST.
-  kFixupMovImmHST,         // kThumb2MovImm16HST.
-  kFixupAlign4,            // Align to 4-byte boundary.
-  kFixupA53Erratum835769,  // Cortex A53 Erratum 835769.
-  kFixupSwitchTable,       // X86_64 packed switch table.
-};
-std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
-
-enum VolatileKind {
-  kNotVolatile,      // Load/Store is not volatile
-  kVolatile          // Load/Store is volatile
-};
-std::ostream& operator<<(std::ostream& os, const VolatileKind& kind);
-
-enum WideKind {
-  kNotWide,      // Non-wide view
-  kWide,         // Wide view
-  kRef           // Ref width
-};
-std::ostream& operator<<(std::ostream& os, const WideKind& kind);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_COMPILER_ENUMS_H_
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 4a98342..8d53dbf 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -606,13 +606,13 @@
     INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
               type_flags), \
     INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
-              type_flags | kIntrinsicFlagIsVolatile), \
+              (type_flags) | kIntrinsicFlagIsVolatile), \
     INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
               type_flags), \
     INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
-              type_flags | kIntrinsicFlagIsVolatile), \
+              (type_flags) | kIntrinsicFlagIsVolatile), \
     INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
-              type_flags | kIntrinsicFlagIsOrdered)
+              (type_flags) | kIntrinsicFlagIsOrdered)
 
     UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
     UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
@@ -843,13 +843,14 @@
   }
 }
 
-uint32_t DexFileMethodInliner::GetOffsetForStringInit(uint32_t method_index, size_t pointer_size) {
+uint32_t DexFileMethodInliner::GetOffsetForStringInit(uint32_t method_index,
+                                                      PointerSize pointer_size) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
   if (it != inline_methods_.end() && (it->second.opcode == kInlineStringInit)) {
     uint32_t string_init_base_offset = Thread::QuickEntryPointOffsetWithSize(
               OFFSETOF_MEMBER(QuickEntryPoints, pNewEmptyString), pointer_size);
-    return string_init_base_offset + it->second.d.data * pointer_size;
+    return string_init_base_offset + it->second.d.data * static_cast<size_t>(pointer_size);
   }
   return 0;
 }
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index fbe403f..dbdfa24 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -18,10 +18,11 @@
 #define ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
 
 #include <stdint.h>
+
+#include "base/enums.h"
 #include "base/mutex.h"
 #include "base/macros.h"
 #include "safe_map.h"
-#include "dex/compiler_enums.h"
 #include "dex_file.h"
 #include "quick/inline_method_analyser.h"
 
@@ -31,6 +32,13 @@
 class MethodVerifier;
 }  // namespace verifier
 
+enum OpSize {
+  k32,
+  k64,
+  kSignedHalf,
+  kSignedByte,
+};
+
 /**
  * Handles inlining of methods from a particular DexFile.
  *
@@ -76,7 +84,7 @@
     /**
      * Gets the thread pointer entrypoint offset for a string init method index and pointer size.
      */
-    uint32_t GetOffsetForStringInit(uint32_t method_index, size_t pointer_size)
+    uint32_t GetOffsetForStringInit(uint32_t method_index, PointerSize pointer_size)
         REQUIRES(!lock_);
 
     /**
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index bace014..4bcd59a 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "dex_file.h"
@@ -169,7 +170,7 @@
       continue;
     }
     auto* cl = Runtime::Current()->GetClassLinker();
-    size_t pointer_size = cl->GetImagePointerSize();
+    PointerSize pointer_size = cl->GetImagePointerSize();
     ArtMethod* abstract_method = method_verifier->GetDexCache()->GetResolvedMethod(
         is_range ? inst->VRegB_3rc() : inst->VRegB_35c(), pointer_size);
     if (abstract_method == nullptr) {
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 94f5acc..3a260f5 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -21,6 +21,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "dex_compilation_unit.h"
 #include "mirror/class_loader.h"
@@ -336,7 +337,7 @@
                                      methods_declaring_class->IsFinal());
   // For invoke-super, ensure the vtable index will be correct to dispatch in the vtable of
   // the super class.
-  const size_t pointer_size = InstructionSetPointerSize(GetInstructionSet());
+  const PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
   // TODO We should be able to sharpen if we are going into the boot image as well.
   bool can_sharpen_super_based_on_type = same_dex_file &&
       (*invoke_type == kSuper) &&
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 5cde93c..758cd93 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -27,6 +27,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/bit_vector.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
@@ -47,7 +48,6 @@
 #include "driver/compiler_options.h"
 #include "jni_internal.h"
 #include "object_lock.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -77,10 +77,6 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
-// Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when
-// given, too all compilations.
-static constexpr bool kRestrictCompilationFiltersToImage = true;
-
 // Print additional info during profile guided compilation.
 static constexpr bool kDebugProfileGuidedCompilation = false;
 
@@ -395,7 +391,7 @@
       dump_passes_(dump_passes),
       timings_logger_(timer),
       compiler_context_(nullptr),
-      support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
+      support_boot_image_fixup_(instruction_set != kMips64),
       dex_files_for_oat_file_(nullptr),
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info),
@@ -435,10 +431,10 @@
 #define CREATE_TRAMPOLINE(type, abi, offset) \
     if (Is64BitInstructionSet(instruction_set_)) { \
       return CreateTrampoline64(instruction_set_, abi, \
-                                type ## _ENTRYPOINT_OFFSET(8, offset)); \
+                                type ## _ENTRYPOINT_OFFSET(PointerSize::k64, offset)); \
     } else { \
       return CreateTrampoline32(instruction_set_, abi, \
-                                type ## _ENTRYPOINT_OFFSET(4, offset)); \
+                                type ## _ENTRYPOINT_OFFSET(PointerSize::k32, offset)); \
     }
 
 std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateJniDlsymLookup() const {
@@ -603,7 +599,38 @@
         InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
       // Leaving this empty will trigger the generic JNI version
     } else {
-      compiled_method = driver->GetCompiler()->JniCompile(access_flags, method_idx, dex_file);
+      // Look-up the ArtMethod associated with this code_item (if any)
+      // -- It is later used to lookup any [optimization] annotations for this method.
+      ScopedObjectAccess soa(self);
+      StackHandleScope<1> hs(soa.Self());
+      Handle<mirror::ClassLoader> class_loader_handle(hs.NewHandle(
+          soa.Decode<mirror::ClassLoader*>(class_loader)));
+
+      // TODO: Lookup annotation from DexFile directly without resolving method.
+      ArtMethod* method =
+          Runtime::Current()->GetClassLinker()->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+              dex_file,
+              method_idx,
+              dex_cache,
+              class_loader_handle,
+              /* referrer */ nullptr,
+              invoke_type);
+
+      bool fast_native = false;
+      if (LIKELY(method != nullptr)) {
+        fast_native = method->IsAnnotatedWithFastNative();
+      } else {
+        // Failed method resolutions happen very rarely, e.g. ancestor class cannot be resolved.
+        DCHECK(self->IsExceptionPending());
+        self->ClearException();
+      }
+
+      Compiler::JniOptimizationFlags optimization_flags =
+          fast_native ? Compiler::kFastNative : Compiler::kNone;
+      compiled_method = driver->GetCompiler()->JniCompile(access_flags,
+                                                          method_idx,
+                                                          dex_file,
+                                                          optimization_flags);
       CHECK(compiled_method != nullptr);
     }
   } else if ((access_flags & kAccAbstract) != 0) {
@@ -946,10 +973,6 @@
 }
 
 bool CompilerDriver::IsClassToCompile(const char* descriptor) const {
-  if (kRestrictCompilationFiltersToImage && !IsBootImage()) {
-    return true;
-  }
-
   if (classes_to_compile_ == nullptr) {
     return true;
   }
@@ -957,10 +980,6 @@
 }
 
 bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const {
-  if (kRestrictCompilationFiltersToImage && !IsBootImage()) {
-    return true;
-  }
-
   if (methods_to_compile_ == nullptr) {
     return true;
   }
@@ -1003,7 +1022,7 @@
 
 class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
  public:
-  ResolveCatchBlockExceptionsClassVisitor(
+  explicit ResolveCatchBlockExceptionsClassVisitor(
       std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
      : exceptions_to_resolve_(exceptions_to_resolve) {}
 
@@ -1016,7 +1035,7 @@
   }
 
  private:
-  void ResolveExceptionsForMethod(ArtMethod* method_handle, size_t pointer_size)
+  void ResolveExceptionsForMethod(ArtMethod* method_handle, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     const DexFile::CodeItem* code_item = method_handle->GetCodeItem();
     if (code_item == nullptr) {
@@ -1148,7 +1167,7 @@
   // Make a copy of the handle so that we don't clobber it doing Assign.
   MutableHandle<mirror::Class> klass(hs.NewHandle(c.Get()));
   std::string temp;
-  const size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   while (!klass->IsObjectClass()) {
     const char* descriptor = klass->GetDescriptor(&temp);
     std::pair<std::unordered_set<std::string>::iterator, bool> result =
@@ -2886,7 +2905,7 @@
 
 bool CompilerDriver::IsStringInit(uint32_t method_index, const DexFile* dex_file, int32_t* offset) {
   DexFileMethodInliner* inliner = GetMethodInlinerMap()->GetMethodInliner(dex_file);
-  size_t pointer_size = InstructionSetPointerSize(GetInstructionSet());
+  const PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
   *offset = inliner->GetOffsetForStringInit(method_index, pointer_size);
   return inliner->IsStringInitMethodIndex(method_index);
 }
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 1bd4c3a..30ba8c9 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -21,7 +21,7 @@
 namespace art {
 
 CompilerOptions::CompilerOptions()
-    : compiler_filter_(kDefaultCompilerFilter),
+    : compiler_filter_(CompilerFilter::kDefaultCompilerFilter),
       huge_method_threshold_(kDefaultHugeMethodThreshold),
       large_method_threshold_(kDefaultLargeMethodThreshold),
       small_method_threshold_(kDefaultSmallMethodThreshold),
@@ -44,7 +44,9 @@
       init_failure_output_(nullptr),
       dump_cfg_file_name_(""),
       dump_cfg_append_(false),
-      force_determinism_(false) {
+      force_determinism_(false),
+      register_allocation_strategy_(RegisterAllocator::kRegisterAllocatorDefault),
+      passes_to_run_(nullptr) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -74,7 +76,9 @@
                                  bool abort_on_hard_verifier_failure,
                                  const std::string& dump_cfg_file_name,
                                  bool dump_cfg_append,
-                                 bool force_determinism
+                                 bool force_determinism,
+                                 RegisterAllocator::Strategy regalloc_strategy,
+                                 const std::vector<std::string>* passes_to_run
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -99,7 +103,9 @@
     init_failure_output_(init_failure_output),
     dump_cfg_file_name_(dump_cfg_file_name),
     dump_cfg_append_(dump_cfg_append),
-    force_determinism_(force_determinism) {
+    force_determinism_(force_determinism),
+    register_allocation_strategy_(regalloc_strategy),
+    passes_to_run_(passes_to_run) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
@@ -144,6 +150,19 @@
   }
 }
 
+void CompilerOptions::ParseRegisterAllocationStrategy(const StringPiece& option,
+                                                      UsageFn Usage) {
+  DCHECK(option.starts_with("--register-allocation-strategy="));
+  StringPiece choice = option.substr(strlen("--register-allocation-strategy=")).data();
+  if (choice == "linear-scan") {
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan;
+  } else if (choice == "graph-color") {
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorGraphColor;
+  } else {
+    Usage("Unrecognized register allocation strategy. Try linear-scan, or graph-color.");
+  }
+}
+
 bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usage) {
   if (option.starts_with("--compiler-filter=")) {
     const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
@@ -190,6 +209,8 @@
     dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
   } else if (option.starts_with("--dump-cfg-append")) {
     dump_cfg_append_ = true;
+  } else if (option.starts_with("--register-allocation-strategy=")) {
+    ParseRegisterAllocationStrategy(option, Usage);
   } else {
     // Option not recognized.
     return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 6d4455e..abc58d7 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -24,6 +24,7 @@
 #include "base/macros.h"
 #include "compiler_filter.h"
 #include "globals.h"
+#include "optimizing/register_allocator.h"
 #include "utils.h"
 
 namespace art {
@@ -31,7 +32,6 @@
 class CompilerOptions FINAL {
  public:
   // Guide heuristics to determine whether to compile method if profile data not available.
-  static const CompilerFilter::Filter kDefaultCompilerFilter = CompilerFilter::kSpeed;
   static const size_t kDefaultHugeMethodThreshold = 10000;
   static const size_t kDefaultLargeMethodThreshold = 600;
   static const size_t kDefaultSmallMethodThreshold = 60;
@@ -75,7 +75,9 @@
                   bool abort_on_hard_verifier_failure,
                   const std::string& dump_cfg_file_name,
                   bool dump_cfg_append,
-                  bool force_determinism);
+                  bool force_determinism,
+                  RegisterAllocator::Strategy regalloc_strategy,
+                  const std::vector<std::string>* passes_to_run);
 
   CompilerFilter::Filter GetCompilerFilter() const {
     return compiler_filter_;
@@ -245,6 +247,14 @@
     return force_determinism_;
   }
 
+  RegisterAllocator::Strategy GetRegisterAllocationStrategy() const {
+    return register_allocation_strategy_;
+  }
+
+  const std::vector<std::string>* GetPassesToRun() const {
+    return passes_to_run_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
@@ -255,6 +265,7 @@
   void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
   void ParseLargeMethodMax(const StringPiece& option, UsageFn Usage);
   void ParseHugeMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseRegisterAllocationStrategy(const StringPiece& option, UsageFn Usage);
 
   CompilerFilter::Filter compiler_filter_;
   size_t huge_method_threshold_;
@@ -298,6 +309,16 @@
   // outcomes.
   bool force_determinism_;
 
+  RegisterAllocator::Strategy register_allocation_strategy_;
+
+  // If not null, specifies optimization passes which will be run instead of defaults.
+  // Note that passes_to_run_ is not checked for correctness and providing an incorrect
+  // list of passes can lead to unexpected compiler behaviour. This is caused by dependencies
+  // between passes. Failing to satisfy them can for example lead to compiler crashes.
+  // Passing pass names which are not recognized by the compiler will result in
+  // compiler-dependant behavior.
+  const std::vector<std::string>* passes_to_run_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 26ab281..7f2e193 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -780,9 +780,9 @@
                               EF_MIPS_PIC       |
                               EF_MIPS_CPIC      |
                               EF_MIPS_ABI_O32   |
-                              features->AsMipsInstructionSetFeatures()->IsR6()
-                                  ? EF_MIPS_ARCH_32R6
-                                  : EF_MIPS_ARCH_32R2);
+                              (features->AsMipsInstructionSetFeatures()->IsR6()
+                                   ? EF_MIPS_ARCH_32R6
+                                   : EF_MIPS_ARCH_32R2));
         break;
       }
       case kMips64: {
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index 38ac052..86f91c5 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -17,6 +17,7 @@
 #include <memory>
 
 #include "base/arena_allocator.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
@@ -100,11 +101,11 @@
       CHECK_ALIGNED(stack_maps_offset, 2);
     }
 
-    method_f_ = my_klass_->FindVirtualMethod("f", "()I", sizeof(void*));
+    method_f_ = my_klass_->FindVirtualMethod("f", "()I", kRuntimePointerSize);
     ASSERT_TRUE(method_f_ != nullptr);
     method_f_->SetEntryPointFromQuickCompiledCode(code_ptr);
 
-    method_g_ = my_klass_->FindVirtualMethod("g", "(I)V", sizeof(void*));
+    method_g_ = my_klass_->FindVirtualMethod("g", "(I)V", kRuntimePointerSize);
     ASSERT_TRUE(method_g_ != nullptr);
     method_g_->SetEntryPointFromQuickCompiledCode(code_ptr);
   }
@@ -169,7 +170,7 @@
   Runtime* r = Runtime::Current();
   r->SetInstructionSet(kRuntimeISA);
   ArtMethod* save_method = r->CreateCalleeSaveMethod();
-  r->SetCalleeSaveMethod(save_method, Runtime::kSaveAll);
+  r->SetCalleeSaveMethod(save_method, Runtime::kSaveAllCalleeSaves);
   QuickMethodFrameInfo frame_info = r->GetRuntimeMethodFrameInfo(save_method);
 
   ASSERT_EQ(kStackAlignment, 16U);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 063eb11..bb45999 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -39,6 +39,7 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/accounting/space_bitmap-inl.h"
+#include "gc/collector/concurrent_copying.h"
 #include "gc/heap.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -51,6 +52,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
@@ -1372,11 +1374,14 @@
   image_methods_[ImageHeader::kResolutionMethod] = runtime->GetResolutionMethod();
   image_methods_[ImageHeader::kImtConflictMethod] = runtime->GetImtConflictMethod();
   image_methods_[ImageHeader::kImtUnimplementedMethod] = runtime->GetImtUnimplementedMethod();
-  image_methods_[ImageHeader::kCalleeSaveMethod] = runtime->GetCalleeSaveMethod(Runtime::kSaveAll);
-  image_methods_[ImageHeader::kRefsOnlySaveMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  image_methods_[ImageHeader::kRefsAndArgsSaveMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+  image_methods_[ImageHeader::kSaveAllCalleeSavesMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves);
+  image_methods_[ImageHeader::kSaveRefsOnlyMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly);
+  image_methods_[ImageHeader::kSaveRefsAndArgsMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
+  image_methods_[ImageHeader::kSaveEverythingMethod] =
+      runtime->GetCalleeSaveMethod(Runtime::kSaveEverything);
   // Visit image methods first to have the main runtime methods in the first image.
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
@@ -1414,9 +1419,12 @@
           bin_offset = RoundUp(bin_offset, method_alignment);
           break;
         }
+        case kBinDexCacheArray:
+          bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment());
+          break;
         case kBinImTable:
         case kBinIMTConflictTable: {
-          bin_offset = RoundUp(bin_offset, target_ptr_size_);
+          bin_offset = RoundUp(bin_offset, static_cast<size_t>(target_ptr_size_));
           break;
         }
         default: {
@@ -1573,7 +1581,7 @@
                                                boot_image_end - boot_image_begin,
                                                boot_oat_begin,
                                                boot_oat_end - boot_oat_begin,
-                                               target_ptr_size_,
+                                               static_cast<uint32_t>(target_ptr_size_),
                                                compile_pic_,
                                                /*is_pic*/compile_app_image_,
                                                image_storage_mode_,
@@ -1823,6 +1831,11 @@
   const auto it = saved_hashcode_map_.find(obj);
   dst->SetLockWord(it != saved_hashcode_map_.end() ?
       LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false);
+  if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+    // Treat all of the objects in the image as marked to avoid unnecessary dirty pages. This is
+    // safe since we mark all of the objects that may reference non immune objects as gray.
+    CHECK(dst->AtomicSetMarkBit(0, 1));
+  }
   FixupObject(obj, dst);
 }
 
@@ -2025,11 +2038,11 @@
   // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
   // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
   //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
-  GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
+  mirror::StringDexCacheType* orig_strings = orig_dex_cache->GetStrings();
   if (orig_strings != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
                                                NativeLocationInImage(orig_strings),
-                                               /*pointer size*/8u);
+                                               PointerSize::k64);
     orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
                                  ImageAddressVisitor(this));
   }
@@ -2037,7 +2050,7 @@
   if (orig_types != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(),
                                                NativeLocationInImage(orig_types),
-                                               /*pointer size*/8u);
+                                               PointerSize::k64);
     orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache),
                                        ImageAddressVisitor(this));
   }
@@ -2045,7 +2058,7 @@
   if (orig_methods != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodsOffset(),
                                                NativeLocationInImage(orig_methods),
-                                               /*pointer size*/8u);
+                                               PointerSize::k64);
     ArtMethod** copy_methods = NativeCopyLocation(orig_methods, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_);
@@ -2058,7 +2071,7 @@
   if (orig_fields != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
                                                NativeLocationInImage(orig_fields),
-                                               /*pointer size*/8u);
+                                               PointerSize::k64);
     ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
       ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 1efdc22..7d13656 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -28,6 +28,7 @@
 
 #include "base/bit_utils.h"
 #include "base/dchecked_vector.h"
+#include "base/enums.h"
 #include "base/length_prefixed_array.h"
 #include "base/macros.h"
 #include "driver/compiler_driver.h"
@@ -216,8 +217,7 @@
   // uint32 = typeof(lockword_)
   // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK
   // failures due to invalid read barrier bits during object field reads.
-  static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits -
-      LockWord::kReadBarrierStateSize;
+  static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits - LockWord::kGCStateSize;
   // 111000.....0
   static const size_t kBinMask = ((static_cast<size_t>(1) << kBinBits) - 1) << kBinShift;
 
@@ -524,7 +524,7 @@
   const bool compile_app_image_;
 
   // Size of pointers on the target architecture.
-  size_t target_ptr_size_;
+  PointerSize target_ptr_size_;
 
   // Image data indexed by the oat file index.
   dchecked_vector<ImageInfo> image_infos_;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index c5aefc2..6f6a8f5 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -32,6 +32,7 @@
 #include "oat_file-inl.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
+#include "optimizing/register_allocator.h"
 #include "thread_list.h"
 
 namespace art {
@@ -88,7 +89,7 @@
 
 JitCompiler::JitCompiler() {
   compiler_options_.reset(new CompilerOptions(
-      CompilerOptions::kDefaultCompilerFilter,
+      CompilerFilter::kDefaultCompilerFilter,
       CompilerOptions::kDefaultHugeMethodThreshold,
       CompilerOptions::kDefaultLargeMethodThreshold,
       CompilerOptions::kDefaultSmallMethodThreshold,
@@ -110,7 +111,9 @@
       /* abort_on_hard_verifier_failure */ false,
       /* dump_cfg_file_name */ "",
       /* dump_cfg_append */ false,
-      /* force_determinism */ false));
+      /* force_determinism */ false,
+      RegisterAllocator::kRegisterAllocatorDefault,
+      /* passes_to_run */ nullptr));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
@@ -172,7 +175,7 @@
 
   size_t thread_count = compiler_driver_->GetThreadCount();
   if (compiler_options_->GetGenerateDebugInfo()) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     const char* prefix = "/data/misc/trace";
 #else
     const char* prefix = "/tmp";
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 05c85e0..4b056f5 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -19,26 +19,42 @@
 
 #include "arch/instruction_set.h"
 #include "base/arena_allocator.h"
+#include "base/enums.h"
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "jni/quick/calling_convention.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 #include "jni/jni_cfi_test_expected.inc"
 
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class JNICFITest : public CFITest {
  public:
   // Enable this flag to generate the expected outputs.
   static constexpr bool kGenerateExpected = false;
 
-  void TestImpl(InstructionSet isa, const char* isa_str,
+  void TestImpl(InstructionSet isa,
+                const char* isa_str,
                 const std::vector<uint8_t>& expected_asm,
                 const std::vector<uint8_t>& expected_cfi) {
+    if (Is64BitInstructionSet(isa)) {
+      TestImplSized<PointerSize::k64>(isa, isa_str, expected_asm, expected_cfi);
+    } else {
+      TestImplSized<PointerSize::k32>(isa, isa_str, expected_asm, expected_cfi);
+    }
+  }
+
+ private:
+  template <PointerSize kPointerSize>
+  void TestImplSized(InstructionSet isa,
+                     const char* isa_str,
+                     const std::vector<uint8_t>& expected_asm,
+                     const std::vector<uint8_t>& expected_cfi) {
     // Description of simple method.
     const bool is_static = true;
     const bool is_synchronized = false;
@@ -52,10 +68,11 @@
     std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
         ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     const int frame_size(jni_conv->FrameSize());
-    const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+    ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
 
     // Assemble the method.
-    std::unique_ptr<Assembler> jni_asm(Assembler::Create(&arena, isa));
+    std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm(
+        JNIMacroAssembler<kPointerSize>::Create(&arena, isa));
     jni_asm->cfi().SetEnabled(true);
     jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
                         callee_save_regs, mr_conv->EntrySpills());
@@ -87,13 +104,25 @@
     TestImpl(isa, #isa, expected_asm, expected_cfi); \
   }
 
+#ifdef ART_ENABLE_CODEGEN_arm
 TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
 TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
 TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
 TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
 TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
 TEST_ISA(kMips64)
+#endif
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index 16b4386..da72c75 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -1,8 +1,7 @@
 static constexpr uint8_t expected_asm_kThumb2[] = {
     0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90,
-    0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20,
-    0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC,
-    0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
+    0x21, 0x91, 0x8D, 0xED, 0x22, 0x0A, 0x23, 0x92, 0x24, 0x93, 0x88, 0xB0,
+    0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
 };
 static constexpr uint8_t expected_cfi_kThumb2[] = {
     0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A,
@@ -11,7 +10,7 @@
     0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05,
     0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05,
     0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01,
-    0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
+    0x4E, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
     0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06,
     0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06,
     0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44,
@@ -47,38 +46,38 @@
 // 0x00000008: sub sp, sp, #36
 // 0x0000000a: .cfi_def_cfa_offset: 128
 // 0x0000000a: str r0, [sp, #0]
-// 0x0000000c: str.w r1, [sp, #132]
-// 0x00000010: vstr.f32 s0, [sp, #136]
-// 0x00000014: str.w r2, [sp, #140]
-// 0x00000018: str.w r3, [sp, #144]
-// 0x0000001c: sub sp, sp, #32
-// 0x0000001e: .cfi_def_cfa_offset: 160
-// 0x0000001e: add sp, sp, #32
-// 0x00000020: .cfi_def_cfa_offset: 128
-// 0x00000020: .cfi_remember_state
-// 0x00000020: add sp, sp, #36
-// 0x00000022: .cfi_def_cfa_offset: 92
-// 0x00000022: vpop.f32 {s16-s31}
-// 0x00000026: .cfi_def_cfa_offset: 28
-// 0x00000026: .cfi_restore_extended: r80
-// 0x00000026: .cfi_restore_extended: r81
-// 0x00000026: .cfi_restore_extended: r82
-// 0x00000026: .cfi_restore_extended: r83
-// 0x00000026: .cfi_restore_extended: r84
-// 0x00000026: .cfi_restore_extended: r85
-// 0x00000026: .cfi_restore_extended: r86
-// 0x00000026: .cfi_restore_extended: r87
-// 0x00000026: .cfi_restore_extended: r88
-// 0x00000026: .cfi_restore_extended: r89
-// 0x00000026: .cfi_restore_extended: r90
-// 0x00000026: .cfi_restore_extended: r91
-// 0x00000026: .cfi_restore_extended: r92
-// 0x00000026: .cfi_restore_extended: r93
-// 0x00000026: .cfi_restore_extended: r94
-// 0x00000026: .cfi_restore_extended: r95
-// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc}
-// 0x0000002a: .cfi_restore_state
-// 0x0000002a: .cfi_def_cfa_offset: 128
+// 0x0000000c: str r1, [sp, #132]
+// 0x0000000e: vstr.f32 s0, [sp, #136]
+// 0x00000012: str r2, [sp, #140]
+// 0x00000014: str r3, [sp, #144]
+// 0x00000016: sub sp, sp, #32
+// 0x00000018: .cfi_def_cfa_offset: 160
+// 0x00000018: add sp, sp, #32
+// 0x0000001a: .cfi_def_cfa_offset: 128
+// 0x0000001a: .cfi_remember_state
+// 0x0000001a: add sp, sp, #36
+// 0x0000001c: .cfi_def_cfa_offset: 92
+// 0x0000001c: vpop.f32 {s16-s31}
+// 0x00000020: .cfi_def_cfa_offset: 28
+// 0x00000020: .cfi_restore_extended: r80
+// 0x00000020: .cfi_restore_extended: r81
+// 0x00000020: .cfi_restore_extended: r82
+// 0x00000020: .cfi_restore_extended: r83
+// 0x00000020: .cfi_restore_extended: r84
+// 0x00000020: .cfi_restore_extended: r85
+// 0x00000020: .cfi_restore_extended: r86
+// 0x00000020: .cfi_restore_extended: r87
+// 0x00000020: .cfi_restore_extended: r88
+// 0x00000020: .cfi_restore_extended: r89
+// 0x00000020: .cfi_restore_extended: r90
+// 0x00000020: .cfi_restore_extended: r91
+// 0x00000020: .cfi_restore_extended: r92
+// 0x00000020: .cfi_restore_extended: r93
+// 0x00000020: .cfi_restore_extended: r94
+// 0x00000020: .cfi_restore_extended: r95
+// 0x00000020: pop {r5, r6, r7, r8, r10, r11, pc}
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 128
 
 static constexpr uint8_t expected_asm_kArm64[] = {
     0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9,
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index c4c2399..b83985a 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -175,6 +175,9 @@
   void StackArgsMixedImpl();
   void StackArgsSignExtendedMips64Impl();
 
+  void NormalNativeImpl();
+  void FastNativeImpl();
+
   JNIEnv* env_;
   jstring library_search_path_;
   jmethodID jmethod_;
@@ -1772,4 +1775,44 @@
 
 JNI_TEST(StackArgsSignExtendedMips64)
 
+void Java_MyClassNatives_normalNative(JNIEnv*, jclass) {
+  // Intentionally left empty.
+}
+
+// Methods not annotated with anything are not considered "fast native"
+// -- Check that the annotation lookup does not find it.
+void JniCompilerTest::NormalNativeImpl() {
+  SetUpForTest(/* direct */ true,
+               "normalNative",
+               "()V",
+               reinterpret_cast<void*>(&Java_MyClassNatives_normalNative));
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ASSERT_TRUE(method != nullptr);
+
+  EXPECT_FALSE(method->IsAnnotatedWithFastNative());
+}
+JNI_TEST(NormalNative)
+
+// Methods annotated with @FastNative are considered "fast native"
+// -- Check that the annotation lookup succeeds.
+void Java_MyClassNatives_fastNative(JNIEnv*, jclass) {
+  // Intentionally left empty.
+}
+
+void JniCompilerTest::FastNativeImpl() {
+  SetUpForTest(/* direct */ true,
+               "fastNative",
+               "()V",
+               reinterpret_cast<void*>(&Java_MyClassNatives_fastNative));
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ASSERT_TRUE(method != nullptr);
+
+  EXPECT_TRUE(method->IsAnnotatedWithFastNative());
+}
+JNI_TEST(FastNative)
+
 }  // namespace art
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 9d2732a..0d16260 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -22,6 +22,8 @@
 namespace art {
 namespace arm {
 
+static_assert(kArmPointerSize == PointerSize::k32, "Unexpected ARM pointer size");
+
 // Used by hard float.
 static const Register kHFCoreArgumentRegisters[] = {
   R0, R1, R2, R3
@@ -31,10 +33,6 @@
   S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
 };
 
-static const SRegister kHFSCalleeSaveRegisters[] = {
-  S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
-};
-
 static const DRegister kHFDArgumentRegisters[] = {
   D0, D1, D2, D3, D4, D5, D6, D7
 };
@@ -42,6 +40,57 @@
 static_assert(arraysize(kHFDArgumentRegisters) * 2 == arraysize(kHFSArgumentRegisters),
     "ks d argument registers mismatch");
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    ArmManagedRegister::FromCoreRegister(R5),
+    ArmManagedRegister::FromCoreRegister(R6),
+    ArmManagedRegister::FromCoreRegister(R7),
+    ArmManagedRegister::FromCoreRegister(R8),
+    ArmManagedRegister::FromCoreRegister(R10),
+    ArmManagedRegister::FromCoreRegister(R11),
+    // Hard float registers.
+    ArmManagedRegister::FromSRegister(S16),
+    ArmManagedRegister::FromSRegister(S17),
+    ArmManagedRegister::FromSRegister(S18),
+    ArmManagedRegister::FromSRegister(S19),
+    ArmManagedRegister::FromSRegister(S20),
+    ArmManagedRegister::FromSRegister(S21),
+    ArmManagedRegister::FromSRegister(S22),
+    ArmManagedRegister::FromSRegister(S23),
+    ArmManagedRegister::FromSRegister(S24),
+    ArmManagedRegister::FromSRegister(S25),
+    ArmManagedRegister::FromSRegister(S26),
+    ArmManagedRegister::FromSRegister(S27),
+    ArmManagedRegister::FromSRegister(S28),
+    ArmManagedRegister::FromSRegister(S29),
+    ArmManagedRegister::FromSRegister(S30),
+    ArmManagedRegister::FromSRegister(S31)
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // LR is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << LR;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm().IsCoreRegister()) {
+      result |= (1 << r.AsArm().AsCoreRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm().IsSRegister()) {
+      result |= (1 << r.AsArm().AsSRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 
 ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -208,7 +257,7 @@
 
 ArmJniCallingConvention::ArmJniCallingConvention(bool is_static, bool is_synchronized,
                                                  const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
+    : JniCallingConvention(is_static, is_synchronized, shorty, kArmPointerSize) {
   // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
   // or jclass for static methods and the JNIEnv. We start at the aligned register r2.
   size_t padding = 0;
@@ -223,32 +272,15 @@
     cur_reg++;  // bump the iterator for every argument
   }
   padding_ = padding;
-
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R5));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R6));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R7));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R8));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R10));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R11));
-
-  for (size_t i = 0; i < arraysize(kHFSCalleeSaveRegisters); ++i) {
-    callee_save_regs_.push_back(ArmManagedRegister::FromSRegister(kHFSCalleeSaveRegisters[i]));
-  }
 }
 
 uint32_t ArmJniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << R5 | 1 << R6 | 1 << R7 | 1 << R8 | 1 << R10 | 1 << R11 | 1 << LR;
-  return result;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t ArmJniCallingConvention::FpSpillMask() const {
-  uint32_t result = 0;
-  for (size_t i = 0; i < arraysize(kHFSCalleeSaveRegisters); ++i) {
-    result |= (1 << kHFSCalleeSaveRegisters[i]);
-  }
-  return result;
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister ArmJniCallingConvention::ReturnScratchRegister() const {
@@ -257,9 +289,10 @@
 
 size_t ArmJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
-  size_t frame_data_size = kArmPointerSize + (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = static_cast<size_t>(kArmPointerSize)
+      + (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kArmPointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -269,6 +302,10 @@
                  kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> ArmJniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void ArmJniCallingConvention::Next() {
@@ -309,7 +346,8 @@
 
 FrameOffset ArmJniCallingConvention::CurrentParamStackOffset() {
   CHECK_GE(itr_slots_, 4u);
-  size_t offset = displacement_.Int32Value() - OutArgSize() + ((itr_slots_ - 4) * kFramePointerSize);
+  size_t offset =
+      displacement_.Int32Value() - OutArgSize() + ((itr_slots_ - 4) * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
 }
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 35b5093..7c717cc 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -17,17 +17,21 @@
 #ifndef ART_COMPILER_JNI_QUICK_ARM_CALLING_CONVENTION_ARM_H_
 #define ART_COMPILER_JNI_QUICK_ARM_CALLING_CONVENTION_ARM_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace arm {
 
-constexpr size_t kFramePointerSize = 4;
+constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k32);
 
 class ArmManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   ArmManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k32) {}
   ~ArmManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -58,9 +62,7 @@
   void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -78,9 +80,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   // Padding to ensure longs and doubles are not split in AAPCS
   size_t padding_;
 
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 9aef10e..afa707d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -22,6 +22,8 @@
 namespace art {
 namespace arm64 {
 
+static_assert(kArm64PointerSize == PointerSize::k64, "Unexpected ARM64 pointer size");
+
 static const XRegister kXArgumentRegisters[] = {
   X0, X1, X2, X3, X4, X5, X6, X7
 };
@@ -38,10 +40,65 @@
   S0, S1, S2, S3, S4, S5, S6, S7
 };
 
-static const DRegister kDCalleeSaveRegisters[] = {
-  D8, D9, D10, D11, D12, D13, D14, D15
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    // Note: The native jni function may call to some VM runtime functions which may suspend
+    // or trigger GC. And the jni method frame will become top quick frame in those cases.
+    // So we need to satisfy GC to save LR and callee-save registers which is similar to
+    // CalleeSaveMethod(RefOnly) frame.
+    // Jni function is the native function which the java code wants to call.
+    // Jni method is the method that is compiled by jni compiler.
+    // Call chain: managed code(java) --> jni method --> jni function.
+    // Thread register(X19) is saved on stack.
+    Arm64ManagedRegister::FromXRegister(X19),
+    Arm64ManagedRegister::FromXRegister(X20),
+    Arm64ManagedRegister::FromXRegister(X21),
+    Arm64ManagedRegister::FromXRegister(X22),
+    Arm64ManagedRegister::FromXRegister(X23),
+    Arm64ManagedRegister::FromXRegister(X24),
+    Arm64ManagedRegister::FromXRegister(X25),
+    Arm64ManagedRegister::FromXRegister(X26),
+    Arm64ManagedRegister::FromXRegister(X27),
+    Arm64ManagedRegister::FromXRegister(X28),
+    Arm64ManagedRegister::FromXRegister(X29),
+    Arm64ManagedRegister::FromXRegister(LR),
+    // Hard float registers.
+    // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2,
+    // we may break on java_method_2 and we still need to find out the values of DEX registers
+    // in java_method_1. So all callee-saves(in managed code) need to be saved.
+    Arm64ManagedRegister::FromDRegister(D8),
+    Arm64ManagedRegister::FromDRegister(D9),
+    Arm64ManagedRegister::FromDRegister(D10),
+    Arm64ManagedRegister::FromDRegister(D11),
+    Arm64ManagedRegister::FromDRegister(D12),
+    Arm64ManagedRegister::FromDRegister(D13),
+    Arm64ManagedRegister::FromDRegister(D14),
+    Arm64ManagedRegister::FromDRegister(D15),
 };
 
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  uint32_t result = 0u;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm64().IsXRegister()) {
+      result |= (1 << r.AsArm64().AsXRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm64().IsDRegister()) {
+      result |= (1 << r.AsArm64().AsDRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return Arm64ManagedRegister::FromXRegister(X20);  // saved on entry restored on exit
@@ -156,48 +213,15 @@
 // JNI calling convention
 Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
                                                      const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  uint32_t core_spill_mask = CoreSpillMask();
-  DCHECK_EQ(XZR, kNumberOfXRegisters - 1);  // Exclude XZR from the loop (avoid 1 << 32).
-  for (int x_reg = 0; x_reg < kNumberOfXRegisters - 1; ++x_reg) {
-    if (((1 << x_reg) & core_spill_mask) != 0) {
-      callee_save_regs_.push_back(
-          Arm64ManagedRegister::FromXRegister(static_cast<XRegister>(x_reg)));
-    }
-  }
-
-  uint32_t fp_spill_mask = FpSpillMask();
-  for (int d_reg = 0; d_reg < kNumberOfDRegisters; ++d_reg) {
-    if (((1 << d_reg) & fp_spill_mask) != 0) {
-      callee_save_regs_.push_back(
-          Arm64ManagedRegister::FromDRegister(static_cast<DRegister>(d_reg)));
-    }
-  }
+    : JniCallingConvention(is_static, is_synchronized, shorty, kArm64PointerSize) {
 }
 
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor.
-  // Note: The native jni function may call to some VM runtime functions which may suspend
-  // or trigger GC. And the jni method frame will become top quick frame in those cases.
-  // So we need to satisfy GC to save LR and callee-save registers which is similar to
-  // CalleeSaveMethod(RefOnly) frame.
-  // Jni function is the native function which the java code wants to call.
-  // Jni method is the method that compiled by jni compiler.
-  // Call chain: managed code(java) --> jni method --> jni function.
-  // Thread register(X19) is saved on stack.
-  return 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
-         1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t Arm64JniCallingConvention::FpSpillMask() const {
-  // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2, we may
-  // break on java_method_2 and we still need to find out the values of DEX registers in
-  // java_method_1. So all callee-saves(in managed code) need to be saved.
-  uint32_t result = 0;
-  for (size_t i = 0; i < arraysize(kDCalleeSaveRegisters); ++i) {
-    result |= (1 << kDCalleeSaveRegisters[i]);
-  }
-  return result;
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
@@ -209,7 +233,7 @@
   size_t frame_data_size = kFramePointerSize +
       CalleeSaveRegisters().size() * kFramePointerSize + sizeof(uint32_t);
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kArm64PointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -218,6 +242,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool Arm64JniCallingConvention::IsCurrentParamInRegister() {
   if (IsCurrentParamAFloatOrDouble()) {
     return (itr_float_and_doubles_ < 8);
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 37c92b2..90b12e5 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -17,17 +17,21 @@
 #ifndef ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_
 #define ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace arm64 {
 
-constexpr size_t kFramePointerSize = 8;
+constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k64);
 
 class Arm64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   Arm64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k64) {}
   ~Arm64ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -57,9 +61,7 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -77,9 +79,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(Arm64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index e21f554..c7ed9c9 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -299,7 +299,7 @@
 
 size_t JniCallingConvention::CurrentParamSize() {
   if (itr_args_ <= kObjectOrClass) {
-    return frame_pointer_size_;  // JNIEnv or jobject/jclass
+    return static_cast<size_t>(frame_pointer_size_);  // JNIEnv or jobject/jclass
   } else {
     int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
     return ParamSize(arg_pos);
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 2c4b15c..995fa51 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -17,12 +17,12 @@
 #ifndef ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 #define ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 
-#include <vector>
-
 #include "base/arena_object.h"
+#include "base/enums.h"
 #include "handle_scope.h"
 #include "primitive.h"
 #include "thread.h"
+#include "utils/array_ref.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -71,8 +71,10 @@
   virtual ~CallingConvention() {}
 
  protected:
-  CallingConvention(bool is_static, bool is_synchronized, const char* shorty,
-                    size_t frame_pointer_size)
+  CallingConvention(bool is_static,
+                    bool is_synchronized,
+                    const char* shorty,
+                    PointerSize frame_pointer_size)
       : itr_slots_(0), itr_refs_(0), itr_args_(0), itr_longs_and_doubles_(0),
         itr_float_and_doubles_(0), displacement_(0),
         frame_pointer_size_(frame_pointer_size),
@@ -199,7 +201,7 @@
   // Space for frames below this on the stack.
   FrameOffset displacement_;
   // The size of a pointer.
-  const size_t frame_pointer_size_;
+  const PointerSize frame_pointer_size_;
   // The size of a reference entry within the handle scope.
   const size_t handle_scope_pointer_size_;
 
@@ -256,7 +258,7 @@
   ManagedRuntimeCallingConvention(bool is_static,
                                   bool is_synchronized,
                                   const char* shorty,
-                                  size_t frame_pointer_size)
+                                  PointerSize frame_pointer_size)
       : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {}
 };
 
@@ -301,7 +303,7 @@
   virtual bool RequiresSmallResultTypeExtension() const = 0;
 
   // Callee save registers to spill prior to native code (which may clobber)
-  virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const = 0;
+  virtual ArrayRef<const ManagedRegister> CalleeSaveRegisters() const = 0;
 
   // Spill mask values
   virtual uint32_t CoreSpillMask() const = 0;
@@ -329,7 +331,7 @@
 
   // Position of handle scope and interior fields
   FrameOffset HandleScopeOffset() const {
-    return FrameOffset(this->displacement_.Int32Value() + frame_pointer_size_);
+    return FrameOffset(this->displacement_.Int32Value() + static_cast<size_t>(frame_pointer_size_));
     // above Method reference
   }
 
@@ -357,8 +359,10 @@
     kObjectOrClass = 1
   };
 
-  JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty,
-                       size_t frame_pointer_size)
+  JniCallingConvention(bool is_static,
+                       bool is_synchronized,
+                       const char* shorty,
+                       PointerSize frame_pointer_size)
       : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {}
 
   // Number of stack slots for outgoing arguments, above which the handle scope is
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 27714b8..d092c3f 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -17,14 +17,17 @@
 #include "jni_compiler.h"
 
 #include <algorithm>
+#include <ios>
 #include <memory>
 #include <vector>
 #include <fstream>
 
 #include "art_method.h"
 #include "base/arena_allocator.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "memory_region.h"
 #include "calling_convention.h"
 #include "class_linker.h"
 #include "compiled_method.h"
@@ -33,35 +36,51 @@
 #include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_env_ext.h"
+#include "debug/dwarf/debug_frame_opcode_writer.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/managed_register.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/arm64/managed_register_arm64.h"
 #include "utils/mips/managed_register_mips.h"
 #include "utils/mips64/managed_register_mips64.h"
 #include "utils/x86/managed_register_x86.h"
+#include "utils.h"
 #include "thread.h"
 
 #define __ jni_asm->
 
 namespace art {
 
-static void CopyParameter(Assembler* jni_asm,
+using JniOptimizationFlags = Compiler::JniOptimizationFlags;
+
+template <PointerSize kPointerSize>
+static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
                           size_t frame_size, size_t out_arg_size);
-static void SetNativeParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                                JniCallingConvention* jni_conv,
                                ManagedRegister in_reg);
 
+template <PointerSize kPointerSize>
+static std::unique_ptr<JNIMacroAssembler<kPointerSize>> GetMacroAssembler(
+    ArenaAllocator* arena, InstructionSet isa, const InstructionSetFeatures* features) {
+  return JNIMacroAssembler<kPointerSize>::Create(arena, isa, features);
+}
+
 // Generate the JNI bridge for the given method, general contract:
 // - Arguments are in the managed runtime format, either on stack or in
 //   registers, a reference to the method object is supplied as part of this
 //   convention.
 //
-CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
-                                            uint32_t access_flags, uint32_t method_idx,
-                                            const DexFile& dex_file) {
+template <PointerSize kPointerSize>
+static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
+                                                   uint32_t access_flags,
+                                                   uint32_t method_idx,
+                                                   const DexFile& dex_file,
+                                                   JniOptimizationFlags optimization_flags) {
   const bool is_native = (access_flags & kAccNative) != 0;
   CHECK(is_native);
   const bool is_static = (access_flags & kAccStatic) != 0;
@@ -69,7 +88,19 @@
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
   InstructionSet instruction_set = driver->GetInstructionSet();
   const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
-  const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
+
+  // i.e. if the method was annotated with @FastNative
+  const bool is_fast_native =
+      (static_cast<uint32_t>(optimization_flags) & Compiler::kFastNative) != 0;
+
+  VLOG(jni) << "JniCompile: Method :: "
+              << art::PrettyMethod(method_idx, dex_file, /* with signature */ true)
+              << " :: access_flags = " << std::hex << access_flags << std::dec;
+
+  if (UNLIKELY(is_fast_native)) {
+    VLOG(jni) << "JniCompile: Fast native method detected :: "
+              << art::PrettyMethod(method_idx, dex_file, /* with signature */ true);
+  }
 
   ArenaPool pool;
   ArenaAllocator arena(&pool);
@@ -100,8 +131,8 @@
       &arena, is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  std::unique_ptr<Assembler> jni_asm(
-      Assembler::Create(&arena, instruction_set, instruction_set_features));
+  std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
+      GetMacroAssembler<kPointerSize>(&arena, instruction_set, instruction_set_features);
   jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   // Offsets into data structures
@@ -112,7 +143,7 @@
 
   // 1. Build the frame saving all callee saves
   const size_t frame_size(main_jni_conv->FrameSize());
-  const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters();
+  ArrayRef<const ManagedRegister> callee_save_regs = main_jni_conv->CalleeSaveRegisters();
   __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
@@ -123,21 +154,12 @@
                            main_jni_conv->ReferenceCount(),
                            mr_conv->InterproceduralScratchRegister());
 
-  if (is_64_bit_target) {
-    __ CopyRawPtrFromThread64(main_jni_conv->HandleScopeLinkOffset(),
-                              Thread::TopHandleScopeOffset<8>(),
+  __ CopyRawPtrFromThread(main_jni_conv->HandleScopeLinkOffset(),
+                          Thread::TopHandleScopeOffset<kPointerSize>(),
+                          mr_conv->InterproceduralScratchRegister());
+  __ StoreStackOffsetToThread(Thread::TopHandleScopeOffset<kPointerSize>(),
+                              main_jni_conv->HandleScopeOffset(),
                               mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread64(Thread::TopHandleScopeOffset<8>(),
-                                  main_jni_conv->HandleScopeOffset(),
-                                  mr_conv->InterproceduralScratchRegister());
-  } else {
-    __ CopyRawPtrFromThread32(main_jni_conv->HandleScopeLinkOffset(),
-                              Thread::TopHandleScopeOffset<4>(),
-                              mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread32(Thread::TopHandleScopeOffset<4>(),
-                                  main_jni_conv->HandleScopeOffset(),
-                                  mr_conv->InterproceduralScratchRegister());
-  }
 
   // 3. Place incoming reference arguments into handle scope
   main_jni_conv->Next();  // Skip JNIEnv*
@@ -187,11 +209,7 @@
   }
 
   // 4. Write out the end of the quick frames.
-  if (is_64_bit_target) {
-    __ StoreStackPointerToThread64(Thread::TopOfManagedStackOffset<8>());
-  } else {
-    __ StoreStackPointerToThread32(Thread::TopOfManagedStackOffset<4>());
-  }
+  __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
 
   // 5. Move frame down to allow space for out going args.
   const size_t main_out_arg_size = main_jni_conv->OutArgSize();
@@ -201,8 +219,8 @@
   // Call the read barrier for the declaring class loaded from the method for a static call.
   // Note that we always have outgoing param space available for at least two params.
   if (kUseReadBarrier && is_static) {
-    ThreadOffset<4> read_barrier32 = QUICK_ENTRYPOINT_OFFSET(4, pReadBarrierJni);
-    ThreadOffset<8> read_barrier64 = QUICK_ENTRYPOINT_OFFSET(8, pReadBarrierJni);
+    ThreadOffset<kPointerSize> read_barrier = QUICK_ENTRYPOINT_OFFSET(kPointerSize,
+                                                                      pReadBarrierJni);
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     main_jni_conv->Next();  // Skip JNIEnv.
     FrameOffset class_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
@@ -222,21 +240,13 @@
     // Pass the current thread as the second argument and call.
     if (main_jni_conv->IsCurrentParamInRegister()) {
       __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-      if (is_64_bit_target) {
-        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier64),
-                main_jni_conv->InterproceduralScratchRegister());
-      } else {
-        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier32),
-                main_jni_conv->InterproceduralScratchRegister());
-      }
+      __ Call(main_jni_conv->CurrentParamRegister(),
+              Offset(read_barrier),
+              main_jni_conv->InterproceduralScratchRegister());
     } else {
       __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                           main_jni_conv->InterproceduralScratchRegister());
-      if (is_64_bit_target) {
-        __ CallFromThread64(read_barrier64, main_jni_conv->InterproceduralScratchRegister());
-      } else {
-        __ CallFromThread32(read_barrier32, main_jni_conv->InterproceduralScratchRegister());
-      }
+      __ CallFromThread(read_barrier, main_jni_conv->InterproceduralScratchRegister());
     }
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));  // Reset.
   }
@@ -245,10 +255,13 @@
   //    can occur. The result is the saved JNI local state that is restored by the exit call. We
   //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
   //    arguments.
-  ThreadOffset<4> jni_start32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodStartSynchronized)
-                                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodStart);
-  ThreadOffset<8> jni_start64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodStartSynchronized)
-                                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodStart);
+  ThreadOffset<kPointerSize> jni_start =
+      is_synchronized
+          ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
+          : (is_fast_native
+                 ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
+                 : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
+
   main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
   FrameOffset locked_object_handle_scope_offset(0);
   if (is_synchronized) {
@@ -269,21 +282,13 @@
   }
   if (main_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-    if (is_64_bit_target) {
-      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start64),
-              main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start32),
-              main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ Call(main_jni_conv->CurrentParamRegister(),
+            Offset(jni_start),
+            main_jni_conv->InterproceduralScratchRegister());
   } else {
     __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                         main_jni_conv->InterproceduralScratchRegister());
-    if (is_64_bit_target) {
-      __ CallFromThread64(jni_start64, main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CallFromThread32(jni_start32, main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CallFromThread(jni_start, main_jni_conv->InterproceduralScratchRegister());
   }
   if (is_synchronized) {  // Check for exceptions from monitor enter.
     __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
@@ -345,20 +350,12 @@
   if (main_jni_conv->IsCurrentParamInRegister()) {
     ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
     DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
-    if (is_64_bit_target) {
-      __ LoadRawPtrFromThread64(jni_env, Thread::JniEnvOffset<8>());
-    } else {
-      __ LoadRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>());
-    }
+    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset<kPointerSize>());
   } else {
     FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
-    if (is_64_bit_target) {
-      __ CopyRawPtrFromThread64(jni_env, Thread::JniEnvOffset<8>(),
-                                main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CopyRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>(),
-                                main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CopyRawPtrFromThread(jni_env,
+                            Thread::JniEnvOffset<kPointerSize>(),
+                            main_jni_conv->InterproceduralScratchRegister());
   }
 
   // 9. Plant call to native code associated with method.
@@ -387,10 +384,13 @@
         main_jni_conv->GetReturnType() == Primitive::kPrimDouble &&
         return_save_location.Uint32Value() % 8 != 0) {
       // Ensure doubles are 8-byte aligned for MIPS
-      return_save_location = FrameOffset(return_save_location.Uint32Value() + kMipsPointerSize);
+      return_save_location = FrameOffset(return_save_location.Uint32Value()
+                                             + static_cast<size_t>(kMipsPointerSize));
     }
     CHECK_LT(return_save_location.Uint32Value(), frame_size + main_out_arg_size);
-    __ Store(return_save_location, main_jni_conv->ReturnRegister(), main_jni_conv->SizeOfReturnValue());
+    __ Store(return_save_location,
+             main_jni_conv->ReturnRegister(),
+             main_jni_conv->SizeOfReturnValue());
   }
 
   // Increase frame size for out args if needed by the end_jni_conv.
@@ -406,21 +406,21 @@
   }
   //     thread.
   end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
-  ThreadOffset<4> jni_end32(-1);
-  ThreadOffset<8> jni_end64(-1);
+
+  ThreadOffset<kPointerSize> jni_end(-1);
   if (reference_return) {
     // Pass result.
-    jni_end32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndWithReferenceSynchronized)
-                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndWithReference);
-    jni_end64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndWithReferenceSynchronized)
-                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndWithReference);
+    jni_end = is_synchronized
+                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
+                  : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
     SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
     end_jni_conv->Next();
   } else {
-    jni_end32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndSynchronized)
-                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEnd);
-    jni_end64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndSynchronized)
-                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEnd);
+    jni_end = is_synchronized
+                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
+                  : (is_fast_native
+                         ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
+                         : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
   }
   // Pass saved local reference state.
   if (end_jni_conv->IsCurrentParamOnStack()) {
@@ -447,21 +447,13 @@
   }
   if (end_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-    if (is_64_bit_target) {
-      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end64),
-              end_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end32),
-              end_jni_conv->InterproceduralScratchRegister());
-    }
+    __ Call(end_jni_conv->CurrentParamRegister(),
+            Offset(jni_end),
+            end_jni_conv->InterproceduralScratchRegister());
   } else {
     __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
                         end_jni_conv->InterproceduralScratchRegister());
-    if (is_64_bit_target) {
-      __ CallFromThread64(ThreadOffset<8>(jni_end64), end_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CallFromThread32(ThreadOffset<4>(jni_end32), end_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CallFromThread(jni_end, end_jni_conv->InterproceduralScratchRegister());
   }
 
   // 13. Reload return value
@@ -501,7 +493,8 @@
 }
 
 // Copy a single parameter from the managed to the JNI calling convention.
-static void CopyParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
                           size_t frame_size, size_t out_arg_size) {
@@ -590,7 +583,8 @@
   }
 }
 
-static void SetNativeParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                                JniCallingConvention* jni_conv,
                                ManagedRegister in_reg) {
   if (jni_conv->IsCurrentParamOnStack()) {
@@ -603,9 +597,18 @@
   }
 }
 
-CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler, uint32_t access_flags,
-                                         uint32_t method_idx, const DexFile& dex_file) {
-  return ArtJniCompileMethodInternal(compiler, access_flags, method_idx, dex_file);
+CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler,
+                                         uint32_t access_flags,
+                                         uint32_t method_idx,
+                                         const DexFile& dex_file,
+                                         Compiler::JniOptimizationFlags optimization_flags) {
+  if (Is64BitInstructionSet(compiler->GetInstructionSet())) {
+    return ArtJniCompileMethodInternal<PointerSize::k64>(
+        compiler, access_flags, method_idx, dex_file, optimization_flags);
+  } else {
+    return ArtJniCompileMethodInternal<PointerSize::k32>(
+        compiler, access_flags, method_idx, dex_file, optimization_flags);
+  }
 }
 
 }  // namespace art
diff --git a/compiler/jni/quick/jni_compiler.h b/compiler/jni/quick/jni_compiler.h
index 46277f1..26c32a3 100644
--- a/compiler/jni/quick/jni_compiler.h
+++ b/compiler/jni/quick/jni_compiler.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_JNI_QUICK_JNI_COMPILER_H_
 #define ART_COMPILER_JNI_QUICK_JNI_COMPILER_H_
 
+#include "compiler.h"
 #include "dex_file.h"
 
 namespace art {
@@ -24,8 +25,11 @@
 class CompilerDriver;
 class CompiledMethod;
 
-CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler, uint32_t access_flags,
-                                         uint32_t method_idx, const DexFile& dex_file);
+CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler,
+                                         uint32_t access_flags,
+                                         uint32_t method_idx,
+                                         const DexFile& dex_file,
+                                         Compiler::JniOptimizationFlags optimization_flags);
 
 }  // namespace art
 
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 2d31a98..f5ab5f7 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -27,6 +27,32 @@
 static const FRegister kFArgumentRegisters[] = { F12, F14 };
 static const DRegister kDArgumentRegisters[] = { D6, D7 };
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    MipsManagedRegister::FromCoreRegister(S2),
+    MipsManagedRegister::FromCoreRegister(S3),
+    MipsManagedRegister::FromCoreRegister(S4),
+    MipsManagedRegister::FromCoreRegister(S5),
+    MipsManagedRegister::FromCoreRegister(S6),
+    MipsManagedRegister::FromCoreRegister(S7),
+    MipsManagedRegister::FromCoreRegister(FP),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // RA is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << RA;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsMips().IsCoreRegister()) {
+      result |= (1 << r.AsMips().AsCoreRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 ManagedRegister MipsManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return MipsManagedRegister::FromCoreRegister(T9);
@@ -146,7 +172,7 @@
 
 MipsJniCallingConvention::MipsJniCallingConvention(bool is_static, bool is_synchronized,
                                                    const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
+    : JniCallingConvention(is_static, is_synchronized, shorty, kMipsPointerSize) {
   // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
   // or jclass for static methods and the JNIEnv. We start at the aligned register A2.
   size_t padding = 0;
@@ -161,21 +187,14 @@
     cur_reg++;  // bump the iterator for every argument
   }
   padding_ = padding;
-
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S2));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S3));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S4));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S5));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S6));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S7));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(FP));
 }
 
 uint32_t MipsJniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << FP | 1 << RA;
-  return result;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t MipsJniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister MipsJniCallingConvention::ReturnScratchRegister() const {
@@ -184,10 +203,10 @@
 
 size_t MipsJniCallingConvention::FrameSize() {
   // ArtMethod*, RA and callee save area size, local reference segment state
-  size_t frame_data_size = kMipsPointerSize +
+  size_t frame_data_size = static_cast<size_t>(kMipsPointerSize) +
       (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kMipsPointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -196,6 +215,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> MipsJniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void MipsJniCallingConvention::Next() {
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index dc45432..e95a738 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -17,17 +17,23 @@
 #ifndef ART_COMPILER_JNI_QUICK_MIPS_CALLING_CONVENTION_MIPS_H_
 #define ART_COMPILER_JNI_QUICK_MIPS_CALLING_CONVENTION_MIPS_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace mips {
 
 constexpr size_t kFramePointerSize = 4;
+static_assert(kFramePointerSize == static_cast<size_t>(PointerSize::k32),
+              "Invalid frame pointer size");
 
 class MipsManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   MipsManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k32) {}
   ~MipsManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -58,14 +64,10 @@
   void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -80,9 +82,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   // Padding to ensure longs and doubles are not split in AAPCS
   size_t padding_;
 
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 807d740..8341e8e 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -31,6 +31,33 @@
   F12, F13, F14, F15, F16, F17, F18, F19
 };
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    Mips64ManagedRegister::FromGpuRegister(S2),
+    Mips64ManagedRegister::FromGpuRegister(S3),
+    Mips64ManagedRegister::FromGpuRegister(S4),
+    Mips64ManagedRegister::FromGpuRegister(S5),
+    Mips64ManagedRegister::FromGpuRegister(S6),
+    Mips64ManagedRegister::FromGpuRegister(S7),
+    Mips64ManagedRegister::FromGpuRegister(GP),
+    Mips64ManagedRegister::FromGpuRegister(S8),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // RA is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << RA;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsMips64().IsGpuRegister()) {
+      result |= (1 << r.AsMips64().AsGpuRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 ManagedRegister Mips64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return Mips64ManagedRegister::FromGpuRegister(T9);
@@ -125,23 +152,15 @@
 
 Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S2));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S3));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S4));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S5));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S6));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S7));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(GP));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S8));
+    : JniCallingConvention(is_static, is_synchronized, shorty, kMips64PointerSize) {
 }
 
 uint32_t Mips64JniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA;
-  DCHECK_EQ(static_cast<size_t>(POPCOUNT(result)), callee_save_regs_.size() + 1);
-  return result;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t Mips64JniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister Mips64JniCallingConvention::ReturnScratchRegister() const {
@@ -153,7 +172,7 @@
   size_t frame_data_size = kFramePointerSize +
       (CalleeSaveRegisters().size() + 1) * kFramePointerSize + sizeof(uint32_t);
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kMips64PointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -162,6 +181,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> Mips64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool Mips64JniCallingConvention::IsCurrentParamInRegister() {
   return itr_args_ < 8;
 }
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h
index 3d6aab7..a5fd111 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.h
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.h
@@ -17,17 +17,23 @@
 #ifndef ART_COMPILER_JNI_QUICK_MIPS64_CALLING_CONVENTION_MIPS64_H_
 #define ART_COMPILER_JNI_QUICK_MIPS64_CALLING_CONVENTION_MIPS64_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace mips64 {
 
 constexpr size_t kFramePointerSize = 8;
+static_assert(kFramePointerSize == static_cast<size_t>(PointerSize::k64),
+              "Invalid frame pointer size");
 
 class Mips64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   Mips64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k64) {}
   ~Mips64ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -57,14 +63,10 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -79,9 +81,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(Mips64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 322caca..1d06f26 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -23,6 +23,30 @@
 namespace art {
 namespace x86 {
 
+static_assert(kX86PointerSize == PointerSize::k32, "Unexpected x86 pointer size");
+
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    X86ManagedRegister::FromCpuRegister(EBP),
+    X86ManagedRegister::FromCpuRegister(ESI),
+    X86ManagedRegister::FromCpuRegister(EDI),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // The spilled PC gets a special marker.
+  uint32_t result = 1 << kNumberOfCpuRegisters;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86().IsCpuRegister()) {
+      result |= (1 << r.AsX86().AsCpuRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 
 ManagedRegister X86ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -168,22 +192,23 @@
 
 X86JniCallingConvention::X86JniCallingConvention(bool is_static, bool is_synchronized,
                                                  const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EBP));
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(ESI));
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EDI));
+    : JniCallingConvention(is_static, is_synchronized, shorty, kX86PointerSize) {
 }
 
 uint32_t X86JniCallingConvention::CoreSpillMask() const {
-  return 1 << EBP | 1 << ESI | 1 << EDI | 1 << kNumberOfCpuRegisters;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t X86JniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 size_t X86JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = kX86PointerSize +
+  size_t frame_data_size = static_cast<size_t>(kX86PointerSize) +
       (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kX86PointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -192,6 +217,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> X86JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool X86JniCallingConvention::IsCurrentParamInRegister() {
   return false;  // Everything is passed by stack.
 }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index cdf0956..ff92fc9 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -17,17 +17,21 @@
 #ifndef ART_COMPILER_JNI_QUICK_X86_CALLING_CONVENTION_X86_H_
 #define ART_COMPILER_JNI_QUICK_X86_CALLING_CONVENTION_X86_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace x86 {
 
-constexpr size_t kFramePointerSize = 4;
+constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k32);
 
 class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   X86ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize),
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k32),
         gpr_arg_count_(0) {}
   ~X86ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
@@ -59,14 +63,10 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -81,9 +81,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index b6b11ca..cbf10bd 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -24,6 +24,49 @@
 namespace art {
 namespace x86_64 {
 
+constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k64);
+
+static_assert(kX86_64PointerSize == PointerSize::k64, "Unexpected x86_64 pointer size");
+
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    X86_64ManagedRegister::FromCpuRegister(RBX),
+    X86_64ManagedRegister::FromCpuRegister(RBP),
+    X86_64ManagedRegister::FromCpuRegister(R12),
+    X86_64ManagedRegister::FromCpuRegister(R13),
+    X86_64ManagedRegister::FromCpuRegister(R14),
+    X86_64ManagedRegister::FromCpuRegister(R15),
+    // Hard float registers.
+    X86_64ManagedRegister::FromXmmRegister(XMM12),
+    X86_64ManagedRegister::FromXmmRegister(XMM13),
+    X86_64ManagedRegister::FromXmmRegister(XMM14),
+    X86_64ManagedRegister::FromXmmRegister(XMM15),
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // The spilled PC gets a special marker.
+  uint32_t result = 1 << kNumberOfCpuRegisters;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86_64().IsCpuRegister()) {
+      result |= (1 << r.AsX86_64().AsCpuRegister().AsRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86_64().IsXmmRegister()) {
+      result |= (1 << r.AsX86_64().AsXmmRegister().AsFloatRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 
 ManagedRegister X86_64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -97,7 +140,7 @@
 
 FrameOffset X86_64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
   return FrameOffset(displacement_.Int32Value() +  // displacement
-                     kX86_64PointerSize +  // Method ref
+                     static_cast<size_t>(kX86_64PointerSize) +  // Method ref
                      itr_slots_ * sizeof(uint32_t));  // offset into in args
 }
 
@@ -124,34 +167,23 @@
 
 X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBX));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBP));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R12));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15));
+    : JniCallingConvention(is_static, is_synchronized, shorty, kX86_64PointerSize) {
 }
 
 uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
-  return 1 << RBX | 1 << RBP | 1 << R12 | 1 << R13 | 1 << R14 | 1 << R15 |
-      1 << kNumberOfCpuRegisters;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t X86_64JniCallingConvention::FpSpillMask() const {
-  return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15;
+  return kFpCalleeSpillMask;
 }
 
 size_t X86_64JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = kX86_64PointerSize +
+  size_t frame_data_size = static_cast<size_t>(kX86_64PointerSize) +
       (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kX86_64PointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
@@ -160,6 +192,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> X86_64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool X86_64JniCallingConvention::IsCurrentParamInRegister() {
   return !IsCurrentParamOnStack();
 }
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 6e47c9f..b98f505 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -17,17 +17,19 @@
 #ifndef ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_
 #define ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_
 
+#include "base/enums.h"
 #include "jni/quick/calling_convention.h"
 
 namespace art {
 namespace x86_64 {
 
-constexpr size_t kFramePointerSize = 8;
-
 class X86_64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
  public:
   X86_64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {}
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k64) {}
   ~X86_64ManagedRuntimeCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
@@ -55,9 +57,7 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -75,9 +75,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(X86_64JniCallingConvention);
 };
 
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index d4dd978..2471f79 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -31,10 +31,6 @@
 }
 
 uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
-  // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
-  // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
-  // of code. To avoid any alignment discrepancies for the final chunk, we always align the
-  // offset after reserving of writing any chunk.
   uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
   bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset,
                                                 MethodReference(nullptr, 0u),
@@ -46,7 +42,7 @@
     unprocessed_patches_.clear();
 
     thunk_locations_.push_back(aligned_offset);
-    offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
+    offset = aligned_offset + thunk_code_.size();
   }
   return offset;
 }
@@ -65,13 +61,7 @@
     if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) {
       return 0u;
     }
-    uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
-    // Align after writing chunk, see the ReserveSpace() above.
-    offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
-    aligned_code_delta = offset - thunk_end_offset;
-    if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
-      return 0u;
-    }
+    offset = aligned_offset + thunk_code_.size();
   }
   return offset;
 }
@@ -92,7 +82,7 @@
                                                       MethodReference method_ref,
                                                       uint32_t max_extra_space) {
   uint32_t quick_code_size = compiled_method->GetQuickCode().size();
-  uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+  uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
   uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
   // Adjust for extra space required by the subclass.
   next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space);
@@ -106,9 +96,9 @@
     if (needs_thunk) {
       // A single thunk will cover all pending patches.
       unprocessed_patches_.clear();
-      uint32_t thunk_location = compiled_method->AlignCode(offset);
+      uint32_t thunk_location = CompiledMethod::AlignCode(offset, instruction_set_);
       thunk_locations_.push_back(thunk_location);
-      offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
+      offset = thunk_location + thunk_code_.size();
     }
   }
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index a8078e3..eace3d4 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -48,18 +48,18 @@
                              const ArrayRef<const LinkerPatch>& method3_patches,
                              uint32_t distance_without_thunks) {
     CHECK_EQ(distance_without_thunks % kArmAlignment, 0u);
-    const uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader);
+    uint32_t method1_offset =
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
 
     // We want to put the method3 at a very precise offset.
     const uint32_t method3_offset = method1_offset + distance_without_thunks;
-    CHECK_ALIGNED(method3_offset - sizeof(OatQuickMethodHeader), kArmAlignment);
+    CHECK_ALIGNED(method3_offset, kArmAlignment);
 
     // Calculate size of method2 so that we put method3 at the correct place.
+    const uint32_t method1_end = method1_offset + method1_code.size();
     const uint32_t method2_offset =
-        CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) +
-        sizeof(OatQuickMethodHeader);
+        method1_end + CodeAlignmentSize(method1_end) + sizeof(OatQuickMethodHeader);
     const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
     std::vector<uint8_t> method2_raw_code(method2_size);
     ArrayRef<const uint8_t> method2_code(method2_raw_code);
@@ -78,8 +78,11 @@
     if (result3.second == method3_offset + 1 /* thumb mode */) {
       return false;  // No thunk.
     } else {
-      uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2);
-      CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */);
+      uint32_t thunk_end =
+          CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), kThumb2) +
+          ThunkSize();
+      uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
+      CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */);
       return true;   // Thunk present.
     }
   }
@@ -352,9 +355,12 @@
 
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t method3_offset = GetMethodOffset(3u);
+  ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset));
   uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader);
-  ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset));
-  uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2);
+  uint32_t thunk_offset =
+      RoundDown(method3_header_offset - ThunkSize(), GetInstructionSetAlignment(kThumb2));
+  DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+            method3_header_offset);
   ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset));
   uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */);
   ASSERT_EQ(diff & 1u, 0u);
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index b4ecbd8..4c8788e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -83,7 +83,7 @@
 
   // Now that we have the actual offset where the code will be placed, locate the ADRP insns
   // that actually require the thunk.
-  uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+  uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
   ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
   uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
   DCHECK(compiled_method != nullptr);
@@ -210,7 +210,16 @@
   } else {
     if ((insn & 0xfffffc00) == 0x91000000) {
       // ADD immediate, 64-bit with imm12 == 0 (unset).
-      DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType();
+      if (!kEmitCompilerReadBarrier) {
+        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+               patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
+      } else {
+        // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the
+        // HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString().
+        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+               patch.GetType() == LinkerPatch::Type::kTypeRelative ||
+               patch.GetType() == LinkerPatch::Type::kDexCacheArray) << patch.GetType();
+      }
       shift = 0u;  // No shift for ADD.
     } else {
       // LDR 32-bit or 64-bit with imm12 == 0 (unset).
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 09729fd..573de73 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -67,36 +67,39 @@
                                  const ArrayRef<const LinkerPatch>& last_method_patches,
                                  uint32_t distance_without_thunks) {
     CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u);
-    const uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+    uint32_t method1_offset =
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
-    const uint32_t gap_start =
-        CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64);
+    const uint32_t gap_start = method1_offset + method1_code.size();
 
     // We want to put the method3 at a very precise offset.
     const uint32_t last_method_offset = method1_offset + distance_without_thunks;
+    CHECK_ALIGNED(last_method_offset, kArm64Alignment);
     const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader);
-    CHECK_ALIGNED(gap_end, kArm64Alignment);
 
-    // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB).
+    // Fill the gap with intermediate methods in chunks of 2MiB and the first in [2MiB, 4MiB).
     // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB
-    // offsets by this test.)
+    // offsets by this test. Making the first chunk bigger makes it easy to give all intermediate
+    // methods the same alignment of the end, so the thunk insertion adds a predictable size as
+    // long as it's after the first chunk.)
     uint32_t method_idx = 2u;
     constexpr uint32_t kSmallChunkSize = 2 * MB;
     std::vector<uint8_t> gap_code;
-    size_t gap_size = gap_end - gap_start;
-    for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) {
-      uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader);
+    uint32_t gap_size = gap_end - gap_start;
+    uint32_t num_small_chunks = std::max(gap_size / kSmallChunkSize, 1u) - 1u;
+    uint32_t chunk_start = gap_start;
+    uint32_t chunk_size = gap_size - num_small_chunks * kSmallChunkSize;
+    for (uint32_t i = 0; i <= num_small_chunks; ++i) {  // num_small_chunks+1 iterations.
+      uint32_t chunk_code_size =
+          chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader);
       gap_code.resize(chunk_code_size, 0u);
       AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
                         ArrayRef<const LinkerPatch>());
       method_idx += 1u;
+      chunk_start += chunk_size;
+      chunk_size = kSmallChunkSize;  // For all but the first chunk.
+      DCHECK_EQ(CodeAlignmentSize(gap_end), CodeAlignmentSize(chunk_start));
     }
-    uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader);
-    gap_code.resize(chunk_code_size, 0u);
-    AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
-                      ArrayRef<const LinkerPatch>());
-    method_idx += 1u;
 
     // Add the last method and link
     AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches);
@@ -109,8 +112,9 @@
     // There may be a thunk before method2.
     if (last_result.second != last_method_offset) {
       // Thunk present. Check that there's only one.
-      uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64);
-      CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size);
+      uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + ThunkSize();
+      uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
+      CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader));
     }
     return method_idx;
   }
@@ -341,7 +345,7 @@
                         uint32_t dex_cache_arrays_begin,
                         uint32_t element_offset) {
     uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     ASSERT_LT(method1_offset, adrp_offset);
     CHECK_ALIGNED(adrp_offset, 4u);
     uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
@@ -391,7 +395,7 @@
                         bool has_thunk,
                         uint32_t string_offset) {
     uint32_t method1_offset =
-        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     ASSERT_LT(method1_offset, adrp_offset);
     CHECK_ALIGNED(adrp_offset, 4u);
     uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
@@ -614,10 +618,12 @@
 
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset));
   uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader);
-  ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset));
-  uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64);
-  ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset));
+  uint32_t thunk_offset =
+      RoundDown(last_method_header_offset - ThunkSize(), GetInstructionSetAlignment(kArm64));
+  DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+            last_method_header_offset);
   uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
   CHECK_ALIGNED(diff, 4u);
   ASSERT_LT(diff, 128 * MB);
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
new file mode 100644
index 0000000..c09950c
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/mips/relative_patcher_mips.h"
+
+#include "compiled_method.h"
+
+namespace art {
+namespace linker {
+
+uint32_t MipsRelativePatcher::ReserveSpace(
+    uint32_t offset,
+    const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
+    MethodReference method_ref ATTRIBUTE_UNUSED) {
+  return offset;  // No space reserved; no limit on relative call distance.
+}
+
+uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+  return offset;  // No space reserved; no limit on relative call distance.
+}
+
+uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) {
+  return offset;  // No thunks added; no limit on relative call distance.
+}
+
+void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                    uint32_t literal_offset ATTRIBUTE_UNUSED,
+                                    uint32_t patch_offset ATTRIBUTE_UNUSED,
+                                    uint32_t target_offset ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS";
+}
+
+void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                   const LinkerPatch& patch,
+                                                   uint32_t patch_offset,
+                                                   uint32_t target_offset) {
+  uint32_t anchor_literal_offset = patch.PcInsnOffset();
+  uint32_t literal_offset = patch.LiteralOffset();
+  bool dex_cache_array = (patch.GetType() == LinkerPatch::Type::kDexCacheArray);
+
+  // Basic sanity checks.
+  if (is_r6) {
+    DCHECK_GE(code->size(), 8u);
+    DCHECK_LE(literal_offset, code->size() - 8u);
+    DCHECK_EQ(literal_offset, anchor_literal_offset);
+    // AUIPC reg, offset_high
+    DCHECK_EQ((*code)[literal_offset + 0], 0x34);
+    DCHECK_EQ((*code)[literal_offset + 1], 0x12);
+    DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E);
+    DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC);
+    // ADDIU reg, reg, offset_low
+    DCHECK_EQ((*code)[literal_offset + 4], 0x78);
+    DCHECK_EQ((*code)[literal_offset + 5], 0x56);
+    DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x24);
+  } else {
+    DCHECK_GE(code->size(), 16u);
+    DCHECK_LE(literal_offset, code->size() - 12u);
+    DCHECK_GE(literal_offset, 4u);
+    // The NAL instruction may not precede immediately as the PC+0 value may
+    // come from HMipsComputeBaseMethodAddress.
+    if (dex_cache_array) {
+      DCHECK_EQ(literal_offset + 4u, anchor_literal_offset);
+      // NAL
+      DCHECK_EQ((*code)[literal_offset - 4], 0x00);
+      DCHECK_EQ((*code)[literal_offset - 3], 0x00);
+      DCHECK_EQ((*code)[literal_offset - 2], 0x10);
+      DCHECK_EQ((*code)[literal_offset - 1], 0x04);
+    }
+    // LUI reg, offset_high
+    DCHECK_EQ((*code)[literal_offset + 0], 0x34);
+    DCHECK_EQ((*code)[literal_offset + 1], 0x12);
+    DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00);
+    DCHECK_EQ((*code)[literal_offset + 3], 0x3C);
+    // ORI reg, reg, offset_low
+    DCHECK_EQ((*code)[literal_offset + 4], 0x78);
+    DCHECK_EQ((*code)[literal_offset + 5], 0x56);
+    DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34);
+    // ADDU reg, reg, reg2
+    DCHECK_EQ((*code)[literal_offset + 8], 0x21);
+    DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00);
+    if (dex_cache_array) {
+      // reg2 is either RA or from HMipsComputeBaseMethodAddress.
+      DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F);
+    }
+    DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00);
+  }
+
+  // Apply patch.
+  uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
+  uint32_t diff = target_offset - anchor_offset;
+  if (dex_cache_array) {
+    diff += kDexCacheArrayLwOffset;
+  }
+  if (is_r6) {
+    diff += (diff & 0x8000) << 1;  // Account for sign extension in ADDIU.
+  }
+
+  // LUI reg, offset_high / AUIPC reg, offset_high
+  (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16);
+  (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24);
+  // ORI reg, reg, offset_low / ADDIU reg, reg, offset_low
+  (*code)[literal_offset + 4] = static_cast<uint8_t>(diff >> 0);
+  (*code)[literal_offset + 5] = static_cast<uint8_t>(diff >> 8);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h
new file mode 100644
index 0000000..4ff2f2f
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
+#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
+
+#include "linker/relative_patcher.h"
+#include "arch/mips/instruction_set_features_mips.h"
+
+namespace art {
+namespace linker {
+
+class MipsRelativePatcher FINAL : public RelativePatcher {
+ public:
+  explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features)
+      : is_r6(features->IsR6()) {}
+
+  uint32_t ReserveSpace(uint32_t offset,
+                        const CompiledMethod* compiled_method,
+                        MethodReference method_ref) OVERRIDE;
+  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+  void PatchCall(std::vector<uint8_t>* code,
+                 uint32_t literal_offset,
+                 uint32_t patch_offset,
+                 uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
+
+ private:
+  // We'll maximize the range of a single load instruction for dex cache array accesses
+  // by aligning offset -32768 with the offset of the first used element.
+  static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+  bool is_r6;
+
+  DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
new file mode 100644
index 0000000..a16aaca
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/mips/relative_patcher_mips.h"
+
+namespace art {
+namespace linker {
+
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+
+class Mips32r6RelativePatcherTest : public RelativePatcherTest {
+ public:
+  Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {}
+
+ protected:
+  static const uint8_t UnpatchedPcRelativeRawCode[];
+  static const uint32_t LiteralOffset;
+  static const uint32_t AnchorOffset;
+  static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode;
+
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    return result.second;
+  }
+
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
+};
+
+const uint8_t Mips32r6RelativePatcherTest::UnpatchedPcRelativeRawCode[] = {
+    0x34, 0x12, 0x5E, 0xEE,  // auipc s2, high(diff); placeholder = 0x1234
+    0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
+};
+const uint32_t Mips32r6RelativePatcherTest::LiteralOffset = 0;  // At auipc (where patching starts).
+const uint32_t Mips32r6RelativePatcherTest::AnchorOffset = 0;  // At auipc (where PC+0 points).
+const ArrayRef<const uint8_t> Mips32r6RelativePatcherTest::UnpatchedPcRelativeCode(
+    UnpatchedPcRelativeRawCode);
+
+void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                       uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+
+  uint32_t diff = target_offset - (result.second + AnchorOffset);
+  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
+    diff += kDexCacheArrayLwOffset;
+  }
+  diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu.
+
+  const uint8_t expected_code[] = {
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26,
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+void Mips32r6RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                        uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
+  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+}
+
+TEST_F(Mips32r6RelativePatcherTest, StringReference) {
+  TestStringReference(/* string_offset*/ 0x87651234);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
new file mode 100644
index 0000000..335ce2e
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips_test.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/mips/relative_patcher_mips.h"
+
+namespace art {
+namespace linker {
+
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+
+class MipsRelativePatcherTest : public RelativePatcherTest {
+ public:
+  MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {}
+
+ protected:
+  static const uint8_t UnpatchedPcRelativeRawCode[];
+  static const uint32_t LiteralOffset;
+  static const uint32_t AnchorOffset;
+  static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode;
+
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    return result.second;
+  }
+
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
+};
+
+const uint8_t MipsRelativePatcherTest::UnpatchedPcRelativeRawCode[] = {
+    0x00, 0x00, 0x10, 0x04,  // nal
+    0x34, 0x12, 0x12, 0x3C,  // lui  s2, high(diff); placeholder = 0x1234
+    0x78, 0x56, 0x52, 0x36,  // ori  s2, s2, low(diff); placeholder = 0x5678
+    0x21, 0x90, 0x5F, 0x02,  // addu s2, s2, ra
+};
+const uint32_t MipsRelativePatcherTest::LiteralOffset = 4;  // At lui (where patching starts).
+const uint32_t MipsRelativePatcherTest::AnchorOffset = 8;  // At ori (where PC+0 points).
+const ArrayRef<const uint8_t> MipsRelativePatcherTest::UnpatchedPcRelativeCode(
+    UnpatchedPcRelativeRawCode);
+
+void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                   uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+
+  uint32_t diff = target_offset - (result.second + AnchorOffset);
+  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
+    diff += kDexCacheArrayLwOffset;
+  }
+
+  const uint8_t expected_code[] = {
+      0x00, 0x00, 0x10, 0x04,
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36,
+      0x21, 0x90, 0x5F, 0x02,
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+void MipsRelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                    uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+TEST_F(MipsRelativePatcherTest, DexCacheReference) {
+  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+}
+
+TEST_F(MipsRelativePatcherTest, StringReference) {
+  TestStringReference(/* string_offset*/ 0x87651234);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
index 3a22983..7765594 100644
--- a/compiler/linker/relative_patcher.cc
+++ b/compiler/linker/relative_patcher.cc
@@ -22,6 +22,9 @@
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "linker/arm64/relative_patcher_arm64.h"
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "linker/mips/relative_patcher_mips.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "linker/x86/relative_patcher_x86.h"
 #endif
@@ -95,6 +98,11 @@
       return std::unique_ptr<RelativePatcher>(
           new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures()));
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips:
+      return std::unique_ptr<RelativePatcher>(
+          new MipsRelativePatcher(features->AsMipsInstructionSetFeatures()));
+#endif
     default:
       return std::unique_ptr<RelativePatcher>(new RelativePatcherNone);
   }
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index ec69107..d21f33e 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -98,6 +98,14 @@
         patches));
   }
 
+  uint32_t CodeAlignmentSize(uint32_t header_offset_to_align) {
+    // We want to align the code rather than the preheader.
+    uint32_t unaligned_code_offset = header_offset_to_align + sizeof(OatQuickMethodHeader);
+    uint32_t aligned_code_offset =
+        CompiledMethod::AlignCode(unaligned_code_offset, instruction_set_);
+    return aligned_code_offset - unaligned_code_offset;
+  }
+
   void Link() {
     // Reserve space.
     static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset.");
@@ -106,9 +114,8 @@
     for (auto& compiled_method : compiled_methods_) {
       offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]);
 
-      uint32_t aligned_offset = compiled_method->AlignCode(offset);
-      uint32_t aligned_code_delta = aligned_offset - offset;
-      offset += aligned_code_delta;
+      uint32_t alignment_size = CodeAlignmentSize(offset);
+      offset += alignment_size;
 
       offset += sizeof(OatQuickMethodHeader);
       uint32_t quick_code_offset = offset + compiled_method->CodeDelta();
@@ -136,11 +143,10 @@
     for (auto& compiled_method : compiled_methods_) {
       offset = patcher_->WriteThunks(&out_, offset);
 
-      uint32_t aligned_offset = compiled_method->AlignCode(offset);
-      uint32_t aligned_code_delta = aligned_offset - offset;
-      CHECK_LE(aligned_code_delta, sizeof(kPadding));
-      out_.WriteFully(kPadding, aligned_code_delta);
-      offset += aligned_code_delta;
+      uint32_t alignment_size = CodeAlignmentSize(offset);
+      CHECK_LE(alignment_size, sizeof(kPadding));
+      out_.WriteFully(kPadding, alignment_size);
+      offset += alignment_size;
 
       out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader));
       offset += sizeof(OatQuickMethodHeader);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 6d1f944..bf53bb2 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -16,6 +16,7 @@
 
 #include "arch/instruction_set_features.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
@@ -158,7 +159,7 @@
   }
 
   bool WriteElf(File* file,
-                ScopedFd&& zip_fd,
+                File&& zip_fd,
                 const char* location,
                 SafeMap<std::string, std::string>& key_value_store,
                 bool verify) {
@@ -444,7 +445,8 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(132 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(164 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+            sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
@@ -708,8 +710,8 @@
 
   {
     // Test using the AddZipDexFileSource() interface with the zip file handle.
-    ScopedFd zip_fd(dup(zip_file.GetFd()));
-    ASSERT_NE(-1, zip_fd.get());
+    File zip_fd(dup(zip_file.GetFd()), /* check_usage */ false);
+    ASSERT_NE(-1, zip_fd.Fd());
 
     ScratchFile oat_file;
     success = WriteElf(oat_file.GetFile(),
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index a02c024..8a80982 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "base/allocator.h"
 #include "base/bit_vector.h"
+#include "base/enums.h"
 #include "base/file_magic.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
@@ -86,6 +87,13 @@
   OatHeader* const oat_header_;
 };
 
+inline uint32_t CodeAlignmentSize(uint32_t header_offset, const CompiledMethod& compiled_method) {
+  // We want to align the code rather than the preheader.
+  uint32_t unaligned_code_offset = header_offset + sizeof(OatQuickMethodHeader);
+  uint32_t aligned_code_offset =  compiled_method.AlignCode(unaligned_code_offset);
+  return aligned_code_offset - unaligned_code_offset;
+}
+
 }  // anonymous namespace
 
 // Defines the location of the raw dex file to write.
@@ -323,14 +331,14 @@
   DCHECK(write_state_ == WriteState::kAddingDexFileSources);
   uint32_t magic;
   std::string error_msg;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(filename, &magic, &error_msg);
+  if (fd.Fd() == -1) {
     PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
     return false;
   } else if (IsDexMagic(magic)) {
     // The file is open for reading, not writing, so it's OK to let the File destructor
     // close it without checking for explicit Close(), so pass checkUsage = false.
-    raw_dex_files_.emplace_back(new File(fd.release(), location, /* checkUsage */ false));
+    raw_dex_files_.emplace_back(new File(fd.Release(), location, /* checkUsage */ false));
     oat_dex_files_.emplace_back(location,
                                 DexFileSource(raw_dex_files_.back().get()),
                                 create_type_lookup_table);
@@ -346,12 +354,12 @@
 }
 
 // Add dex file source(s) from a zip file specified by a file handle.
-bool OatWriter::AddZippedDexFilesSource(ScopedFd&& zip_fd,
+bool OatWriter::AddZippedDexFilesSource(File&& zip_fd,
                                         const char* location,
                                         CreateTypeLookupTable create_type_lookup_table) {
   DCHECK(write_state_ == WriteState::kAddingDexFileSources);
   std::string error_msg;
-  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.release(), location, &error_msg));
+  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.Release(), location, &error_msg));
   ZipArchive* zip_archive = zip_archives_.back().get();
   if (zip_archive == nullptr) {
     LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': "
@@ -506,7 +514,7 @@
   if (!HasBootImage()) {
     // Allocate space for app dex cache arrays in the .bss section.
     size_t bss_start = RoundUp(size_, kPageSize);
-    size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
+    PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set);
     bss_size_ = 0u;
     for (const DexFile* dex_file : *dex_files_) {
       dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_);
@@ -816,8 +824,8 @@
                               uint32_t thumb_offset) {
     offset_ = writer_->relative_patcher_->ReserveSpace(
         offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex()));
-    offset_ = compiled_method->AlignCode(offset_);
-    DCHECK_ALIGNED_PARAM(offset_,
+    offset_ += CodeAlignmentSize(offset_, *compiled_method);
+    DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader),
                          GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
     return offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
   }
@@ -941,7 +949,7 @@
   }
 
  protected:
-  const size_t pointer_size_;
+  const PointerSize pointer_size_;
 };
 
 class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
@@ -1010,17 +1018,16 @@
           ReportWriteFailure("relative call thunk", it);
           return false;
         }
-        uint32_t aligned_offset = compiled_method->AlignCode(offset_);
-        uint32_t aligned_code_delta = aligned_offset - offset_;
-        if (aligned_code_delta != 0) {
-          if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+        uint32_t alignment_size = CodeAlignmentSize(offset_, *compiled_method);
+        if (alignment_size != 0) {
+          if (!writer_->WriteCodeAlignment(out, alignment_size)) {
             ReportWriteFailure("code alignment padding", it);
             return false;
           }
-          offset_ += aligned_code_delta;
+          offset_ += alignment_size;
           DCHECK_OFFSET_();
         }
-        DCHECK_ALIGNED_PARAM(offset_,
+        DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader),
                              GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
         DCHECK_EQ(method_offsets.code_offset_,
                   offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
@@ -1066,6 +1073,14 @@
                                                                      target_offset);
                 break;
               }
+              case LinkerPatch::Type::kTypeRelative: {
+                uint32_t target_offset = GetTargetObjectOffset(GetTargetType(patch));
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
               case LinkerPatch::Type::kCall: {
                 uint32_t target_offset = GetTargetOffset(patch);
                 PatchCodeAddress(&patched_code_, literal_offset, target_offset);
@@ -1141,7 +1156,8 @@
     if (UNLIKELY(target_offset == 0)) {
       ArtMethod* target = GetTargetMethod(patch);
       DCHECK(target != nullptr);
-      size_t size = GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet());
+      PointerSize size =
+          GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet());
       const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size);
       if (oat_code_offset != 0) {
         DCHECK(!writer_->HasBootImage());
@@ -1158,17 +1174,28 @@
     return target_offset;
   }
 
-  mirror::Class* GetTargetType(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::DexCache* dex_cache = (dex_file_ == patch.TargetTypeDexFile())
+  mirror::DexCache* GetDexCache(const DexFile* target_dex_file)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    return (target_dex_file == dex_file_)
         ? dex_cache_
-        : class_linker_->FindDexCache(Thread::Current(), *patch.TargetTypeDexFile());
+        : class_linker_->FindDexCache(Thread::Current(), *target_dex_file);
+  }
+
+  mirror::Class* GetTargetType(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
+    mirror::DexCache* dex_cache = GetDexCache(patch.TargetTypeDexFile());
     mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex());
     CHECK(type != nullptr);
     return type;
   }
 
   mirror::String* GetTargetString(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::String* string = dex_cache_->GetResolvedString(patch.TargetStringIndex());
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    ClassLinker* linker = Runtime::Current()->GetClassLinker();
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache(patch.TargetStringDexFile())));
+    mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(),
+                                                  patch.TargetStringIndex(),
+                                                  dex_cache);
     DCHECK(string != nullptr);
     DCHECK(writer_->HasBootImage() ||
            Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string));
@@ -1438,8 +1465,8 @@
       offset = CompiledCode::AlignCode(offset, instruction_set); \
       adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
       oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
-      field = compiler_driver_->Create ## fn_name(); \
-      offset += field->size();
+      (field) = compiler_driver_->Create ## fn_name(); \
+      offset += (field)->size();
 
     DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup);
     DO_TRAMPOLINE(quick_generic_jni_trampoline_, QuickGenericJniTrampoline);
@@ -1559,8 +1586,8 @@
   if (kIsDebugBuild) {
     uint32_t size_total = 0;
     #define DO_STAT(x) \
-      VLOG(compiler) << #x "=" << PrettySize(x) << " (" << x << "B)"; \
-      size_total += x;
+      VLOG(compiler) << #x "=" << PrettySize(x) << " (" << (x) << "B)"; \
+      size_total += (x);
 
     DO_STAT(size_dex_file_alignment_);
     DO_STAT(size_executable_offset_alignment_);
@@ -1716,12 +1743,12 @@
         uint32_t alignment_padding = aligned_offset - relative_offset; \
         out->Seek(alignment_padding, kSeekCurrent); \
         size_trampoline_alignment_ += alignment_padding; \
-        if (!out->WriteFully(field->data(), field->size())) { \
+        if (!out->WriteFully((field)->data(), (field)->size())) { \
           PLOG(ERROR) << "Failed to write " # field " to " << out->GetLocation(); \
           return false; \
         } \
-        size_ ## field += field->size(); \
-        relative_offset += alignment_padding + field->size(); \
+        size_ ## field += (field)->size(); \
+        relative_offset += alignment_padding + (field)->size(); \
         DCHECK_OFFSET(); \
       } while (false)
 
@@ -2181,6 +2208,7 @@
                                          oat_dex_file.dex_file_location_checksum_,
                                          /* oat_dex_file */ nullptr,
                                          verify,
+                                         verify,
                                          &error_msg));
     if (dex_files.back() == nullptr) {
       LOG(ERROR) << "Failed to open dex file from oat file. File: " << oat_dex_file.GetLocation()
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index cc81f39..decb7db 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -29,7 +29,6 @@
 #include "oat.h"
 #include "os.h"
 #include "safe_map.h"
-#include "ScopedFd.h"
 #include "utils/array_ref.h"
 
 namespace art {
@@ -132,7 +131,7 @@
       CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
   // Add dex file source(s) from a zip file specified by a file handle.
   bool AddZippedDexFilesSource(
-      ScopedFd&& zip_fd,
+      File&& zip_fd,
       const char* location,
       CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
   // Add dex file source from raw memory.
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index f0c4eaf..8aefd9e 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -533,9 +533,6 @@
         first_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        dynamic_bce_standby_(
-            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        record_dynamic_bce_standby_(true),
         early_exit_loop_(
             std::less<uint32_t>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
@@ -560,14 +557,6 @@
   }
 
   void Finish() {
-    // Retry dynamic bce candidates on standby that are still in the graph.
-    record_dynamic_bce_standby_ = false;
-    for (HBoundsCheck* bounds_check : dynamic_bce_standby_) {
-      if (bounds_check->IsInBlock()) {
-        TryDynamicBCE(bounds_check);
-      }
-    }
-
     // Preserve SSA structure which may have been broken by adding one or more
     // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
     InsertPhiNodes();
@@ -576,7 +565,6 @@
     early_exit_loop_.clear();
     taken_test_loop_.clear();
     finite_loop_.clear();
-    dynamic_bce_standby_.clear();
   }
 
  private:
@@ -800,6 +788,27 @@
             ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
+    } else if (cond == kCondNE || cond == kCondEQ) {
+      if (left->IsArrayLength() && lower.IsConstant() && upper.IsConstant()) {
+        // Special case:
+        //   length == [c,d] yields [c, d] along true
+        //   length != [c,d] yields [c, d] along false
+        if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) {
+          ValueRange* new_range = new (GetGraph()->GetArena())
+              ValueRange(GetGraph()->GetArena(), lower, upper);
+          ApplyRangeFromComparison(
+              left, block, cond == kCondEQ ? true_successor : false_successor, new_range);
+        }
+        // In addition:
+        //   length == 0 yields [1, max] along false
+        //   length != 0 yields [1, max] along true
+        if (lower.GetConstant() == 0 && upper.GetConstant() == 0) {
+          ValueRange* new_range = new (GetGraph()->GetArena())
+              ValueRange(GetGraph()->GetArena(), ValueBound(nullptr, 1), ValueBound::Max());
+          ApplyRangeFromComparison(
+              left, block, cond == kCondEQ ? false_successor : true_successor, new_range);
+        }
+      }
     }
   }
 
@@ -811,7 +820,6 @@
            array_length->IsArrayLength() ||
            array_length->IsPhi());
     bool try_dynamic_bce = true;
-
     // Analyze index range.
     if (!index->IsIntConstant()) {
       // Non-constant index.
@@ -826,7 +834,7 @@
       }
       // Try index range obtained by induction variable analysis.
       // Disables dynamic bce if OOB is certain.
-      if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
+      if (InductionRangeFitsIn(&array_range, bounds_check, &try_dynamic_bce)) {
         ReplaceInstruction(bounds_check, index);
         return;
       }
@@ -875,10 +883,20 @@
     // If static analysis fails, and OOB is not certain, try dynamic elimination.
     if (try_dynamic_bce) {
       // Try loop-based dynamic elimination.
-      if (TryDynamicBCE(bounds_check)) {
+      HLoopInformation* loop = bounds_check->GetBlock()->GetLoopInformation();
+      bool needs_finite_test = false;
+      bool needs_taken_test = false;
+      if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) &&
+          induction_range_.CanGenerateCode(
+              bounds_check, index, &needs_finite_test, &needs_taken_test) &&
+          CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+          // Do this test last, since it may generate code.
+          CanHandleLength(loop, array_length, needs_taken_test)) {
+        TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        TransformLoopForDynamicBCE(loop, bounds_check);
         return;
       }
-      // Prepare dominator-based dynamic elimination.
+      // Otherwise, prepare dominator-based dynamic elimination.
       if (first_index_bounds_check_map_.find(array_length->GetId()) ==
           first_index_bounds_check_map_.end()) {
         // Remember the first bounds check against each array_length. That bounds check
@@ -891,14 +909,15 @@
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
     DCHECK(phi->IsLoopHeaderPhi());
+    HConstInputsRef inputs = phi->GetInputs();
     // Start with input 1. Input 0 is from the incoming block.
-    HInstruction* input1 = phi->InputAt(1);
+    const HInstruction* input1 = inputs[1];
     DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
         *phi->GetBlock()->GetPredecessors()[1]));
-    for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
+    for (size_t i = 2; i < inputs.size(); ++i) {
       DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
           *phi->GetBlock()->GetPredecessors()[i]));
-      if (input1 != phi->InputAt(i)) {
+      if (input1 != inputs[i]) {
         return false;
       }
     }
@@ -955,13 +974,7 @@
   void VisitIf(HIf* instruction) OVERRIDE {
     if (instruction->InputAt(0)->IsCondition()) {
       HCondition* cond = instruction->InputAt(0)->AsCondition();
-      IfCondition cmp = cond->GetCondition();
-      if (cmp == kCondGT || cmp == kCondGE ||
-          cmp == kCondLT || cmp == kCondLE) {
-        HInstruction* left = cond->GetLeft();
-        HInstruction* right = cond->GetRight();
-        HandleIf(instruction, left, right, cmp);
-      }
+      HandleIf(instruction, cond->GetLeft(), cond->GetRight(), cond->GetCondition());
     }
   }
 
@@ -1164,7 +1177,7 @@
     }
   }
 
-  // Perform dominator-based dynamic elimination on suitable set of bounds checks.
+  /** Performs dominator-based dynamic elimination on suitable set of bounds checks. */
   void AddCompareWithDeoptimization(HBasicBlock* block,
                                     HInstruction* array_length,
                                     HInstruction* base,
@@ -1174,6 +1187,12 @@
     // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
     // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
     // and base+3, since we made the assumption any in between value may occur too.
+    // In code, using unsigned comparisons:
+    // (1) constants only
+    //       if (max_c >= a.length) deoptimize;
+    // (2) general case
+    //       if (base-min_c >  base+max_c) deoptimize;
+    //       if (base+max_c >= a.length  ) deoptimize;
     static_assert(kMaxLengthForAddingDeoptimize < std::numeric_limits<int32_t>::max(),
                   "Incorrect max length may be subject to arithmetic wrap-around");
     HInstruction* upper = GetGraph()->GetIntConstant(max_c);
@@ -1192,7 +1211,7 @@
     has_dom_based_dynamic_bce_ = true;
   }
 
-  // Attempt dominator-based dynamic elimination on remaining candidates.
+  /** Attempts dominator-based dynamic elimination on remaining candidates. */
   void AddComparesWithDeoptimization(HBasicBlock* block) {
     for (const auto& entry : first_index_bounds_check_map_) {
       HBoundsCheck* bounds_check = entry.second;
@@ -1256,17 +1275,19 @@
           candidates.push_back(other_bounds_check);
         }
       }
-      // Perform dominator-based deoptimization if it seems profitable. Note that we reject cases
-      // where the distance min_c:max_c range gets close to the maximum possible array length,
-      // since those cases are likely to always deopt (such situations do not necessarily go
-      // OOB, though, since the programmer could rely on wrap-around from max to min).
+      // Perform dominator-based deoptimization if it seems profitable, where we eliminate
+      // bounds checks and replace these with deopt checks that guard against any possible
+      // OOB. Note that we reject cases where the distance min_c:max_c range gets close to
+      // the maximum possible array length, since those cases are likely to always deopt
+      // (such situations do not necessarily go OOB, though, since the array could be really
+      // large, or the programmer could rely on arithmetic wrap-around from max to min).
       size_t threshold = kThresholdForAddingDeoptimize + (base == nullptr ? 0 : 1);  // extra test?
       uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
       if (candidates.size() >= threshold &&
           (base != nullptr || min_c >= 0) &&  // reject certain OOB
            distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
         AddCompareWithDeoptimization(block, array_length, base, min_c, max_c);
-        for (HInstruction* other_bounds_check : candidates) {
+        for (HBoundsCheck* other_bounds_check : candidates) {
           // Only replace if still in the graph. This avoids visiting the same
           // bounds check twice if it occurred multiple times in the use list.
           if (other_bounds_check->IsInBlock()) {
@@ -1283,77 +1304,156 @@
    * parameter try_dynamic_bce is set to false if OOB is certain.
    */
   bool InductionRangeFitsIn(ValueRange* array_range,
-                            HInstruction* context,
-                            HInstruction* index,
+                            HBoundsCheck* context,
                             bool* try_dynamic_bce) {
     InductionVarRange::Value v1;
     InductionVarRange::Value v2;
     bool needs_finite_test = false;
-    if (induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test)) {
-      do {
-        if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-            v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-          DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-          DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-          ValueRange index_range(GetGraph()->GetArena(),
-                                 ValueBound(v1.instruction, v1.b_constant),
-                                 ValueBound(v2.instruction, v2.b_constant));
-          // If analysis reveals a certain OOB, disable dynamic BCE.
-          if (index_range.GetLower().LessThan(array_range->GetLower()) ||
-              index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
-            *try_dynamic_bce = false;
-            return false;
-          }
-          // Use analysis for static bce only if loop is finite.
-          if (!needs_finite_test && index_range.FitsIn(array_range)) {
-            return true;
-          }
+    HInstruction* index = context->InputAt(0);
+    HInstruction* hint = ValueBound::HuntForDeclaration(context->InputAt(1));
+    if (induction_range_.GetInductionRange(context, index, hint, &v1, &v2, &needs_finite_test)) {
+      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+        ValueRange index_range(GetGraph()->GetArena(),
+                               ValueBound(v1.instruction, v1.b_constant),
+                               ValueBound(v2.instruction, v2.b_constant));
+        // If analysis reveals a certain OOB, disable dynamic BCE. Otherwise,
+        // use analysis for static bce only if loop is finite.
+        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+          *try_dynamic_bce = false;
+        } else if (!needs_finite_test && index_range.FitsIn(array_range)) {
+          return true;
         }
-      } while (induction_range_.RefineOuter(&v1, &v2));
+      }
     }
     return false;
   }
 
   /**
-   * When the compiler fails to remove a bounds check statically, we try to remove the bounds
-   * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
-   * will go out of range (we want to be rather certain of that given the slowdown of
-   * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
-   * bounds checks and related null checks removed.
+   * Performs loop-based dynamic elimination on a bounds check. In order to minimize the
+   * number of eventually generated tests, related bounds checks with tests that can be
+   * combined with tests for the given bounds check are collected first.
    */
-  bool TryDynamicBCE(HBoundsCheck* instruction) {
-    HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
-    HInstruction* index = instruction->InputAt(0);
-    HInstruction* length = instruction->InputAt(1);
-    // If dynamic bounds check elimination seems profitable and is possible, then proceed.
-    bool needs_finite_test = false;
-    bool needs_taken_test = false;
-    if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
-        induction_range_.CanGenerateCode(
-            instruction, index, &needs_finite_test, &needs_taken_test) &&
-        CanHandleInfiniteLoop(loop, instruction, index, needs_finite_test) &&
-        CanHandleLength(loop, length, needs_taken_test)) {  // do this test last (may code gen)
-      HInstruction* lower = nullptr;
-      HInstruction* upper = nullptr;
-      // Generate the following unsigned comparisons
-      //     if (lower > upper)   deoptimize;
-      //     if (upper >= length) deoptimize;
-      // or, for a non-induction index, just the unsigned comparison on its 'upper' value
-      //     if (upper >= length) deoptimize;
-      // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
-      // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
-      // correctly guard against any possible OOB (including arithmetic wrap-around cases).
-      TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
-      HBasicBlock* block = GetPreHeader(loop, instruction);
-      induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
-      if (lower != nullptr) {
-        InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+  void TransformLoopForDynamicBCE(HLoopInformation* loop, HBoundsCheck* bounds_check) {
+    HInstruction* index = bounds_check->InputAt(0);
+    HInstruction* array_length = bounds_check->InputAt(1);
+    DCHECK(loop->IsDefinedOutOfTheLoop(array_length));  // pre-checked
+    DCHECK(loop->DominatesAllBackEdges(bounds_check->GetBlock()));
+    // Collect all bounds checks in the same loop that are related as "a[base + constant]"
+    // for a base instruction (possibly absent) and various constants.
+    ValueBound value = ValueBound::AsValueBound(index);
+    HInstruction* base = value.GetInstruction();
+    int32_t min_c = base == nullptr ? 0 : value.GetConstant();
+    int32_t max_c = value.GetConstant();
+    ArenaVector<HBoundsCheck*> candidates(
+        GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+    ArenaVector<HBoundsCheck*> standby(
+        GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+    for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) {
+      HInstruction* user = use.GetUser();
+      if (user->IsBoundsCheck() && loop == user->GetBlock()->GetLoopInformation()) {
+        HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
+        HInstruction* other_index = other_bounds_check->InputAt(0);
+        HInstruction* other_array_length = other_bounds_check->InputAt(1);
+        ValueBound other_value = ValueBound::AsValueBound(other_index);
+        int32_t other_c = other_value.GetConstant();
+        if (array_length == other_array_length && base == other_value.GetInstruction()) {
+          // Does the current basic block dominate all back edges? If not,
+          // add this candidate later only if it falls into the range.
+          if (!loop->DominatesAllBackEdges(user->GetBlock())) {
+            standby.push_back(other_bounds_check);
+            continue;
+          }
+          min_c = std::min(min_c, other_c);
+          max_c = std::max(max_c, other_c);
+          candidates.push_back(other_bounds_check);
+        }
       }
-      InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
-      ReplaceInstruction(instruction, index);
-      return true;
     }
-    return false;
+    // Add standby candidates that fall in selected range.
+    for (HBoundsCheck* other_bounds_check : standby) {
+      HInstruction* other_index = other_bounds_check->InputAt(0);
+      int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+      if (min_c <= other_c && other_c <= max_c) {
+        candidates.push_back(other_bounds_check);
+      }
+    }
+    // Perform loop-based deoptimization if it seems profitable, where we eliminate bounds
+    // checks and replace these with deopt checks that guard against any possible OOB.
+    DCHECK_LT(0u, candidates.size());
+    uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
+    if ((base != nullptr || min_c >= 0) &&  // reject certain OOB
+        distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
+      HBasicBlock* block = GetPreHeader(loop, bounds_check);
+      HInstruction* min_lower = nullptr;
+      HInstruction* min_upper = nullptr;
+      HInstruction* max_lower = nullptr;
+      HInstruction* max_upper = nullptr;
+      // Iterate over all bounds checks.
+      for (HBoundsCheck* other_bounds_check : candidates) {
+        // Only handle if still in the graph. This avoids visiting the same
+        // bounds check twice if it occurred multiple times in the use list.
+        if (other_bounds_check->IsInBlock()) {
+          HInstruction* other_index = other_bounds_check->InputAt(0);
+          int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+          // Generate code for either the maximum or minimum. Range analysis already was queried
+          // whether code generation on the original and, thus, related bounds check was possible.
+          // It handles either loop invariants (lower is not set) or unit strides.
+          if (other_c == max_c) {
+            induction_range_.GenerateRangeCode(
+                other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper);
+          } else if (other_c == min_c && base != nullptr) {
+            induction_range_.GenerateRangeCode(
+                other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper);
+          }
+          ReplaceInstruction(other_bounds_check, other_index);
+        }
+      }
+      // In code, using unsigned comparisons:
+      // (1) constants only
+      //       if (max_upper >= a.length ) deoptimize;
+      // (2) two symbolic invariants
+      //       if (min_upper >  max_upper) deoptimize;   unless min_c == max_c
+      //       if (max_upper >= a.length ) deoptimize;
+      // (3) general case, unit strides (where lower would exceed upper for arithmetic wrap-around)
+      //       if (min_lower >  max_lower) deoptimize;   unless min_c == max_c
+      //       if (max_lower >  max_upper) deoptimize;
+      //       if (max_upper >= a.length ) deoptimize;
+      if (base == nullptr) {
+        // Constants only.
+        DCHECK_GE(min_c, 0);
+        DCHECK(min_lower == nullptr && min_upper == nullptr &&
+               max_lower == nullptr && max_upper != nullptr);
+      } else if (max_lower == nullptr) {
+        // Two symbolic invariants.
+        if (min_c != max_c) {
+          DCHECK(min_lower == nullptr && min_upper != nullptr &&
+                 max_lower == nullptr && max_upper != nullptr);
+          InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_upper, max_upper));
+        } else {
+          DCHECK(min_lower == nullptr && min_upper == nullptr &&
+                 max_lower == nullptr && max_upper != nullptr);
+        }
+      } else {
+        // General case, unit strides.
+        if (min_c != max_c) {
+          DCHECK(min_lower != nullptr && min_upper != nullptr &&
+                 max_lower != nullptr && max_upper != nullptr);
+          InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_lower, max_lower));
+        } else {
+          DCHECK(min_lower == nullptr && min_upper == nullptr &&
+                 max_lower != nullptr && max_upper != nullptr);
+        }
+        InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(max_lower, max_upper));
+      }
+      InsertDeoptInLoop(
+          loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(max_upper, array_length));
+    } else {
+      // TODO: if rejected, avoid doing this again for subsequent instructions in this set?
+    }
   }
 
   /**
@@ -1461,8 +1561,7 @@
    * of the loop to use, dynamic bce in such cases is only allowed if other tests
    * ensure the loop is finite.
    */
-  bool CanHandleInfiniteLoop(
-      HLoopInformation* loop, HBoundsCheck* check, HInstruction* index, bool needs_infinite_test) {
+  bool CanHandleInfiniteLoop(HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
     if (needs_infinite_test) {
       // If we already forced the loop to be finite, allow directly.
       const uint32_t loop_id = loop->GetHeader()->GetBlockId();
@@ -1484,11 +1583,6 @@
           }
         }
       }
-      // If bounds check made it this far, it is worthwhile to check later if
-      // the loop was forced finite by another candidate.
-      if (record_dynamic_bce_standby_) {
-        dynamic_bce_standby_.push_back(check);
-      }
       return false;
     }
     return true;
@@ -1714,10 +1808,6 @@
   // in a block that checks an index against that HArrayLength.
   ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_;
 
-  // Stand by list for dynamic bce.
-  ArenaVector<HBoundsCheck*> dynamic_bce_standby_;
-  bool record_dynamic_bce_standby_;
-
   // Early-exit loop bookkeeping.
   ArenaSafeMap<uint32_t, bool> early_exit_loop_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e7fa4e4..c532e72 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -50,6 +50,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object_reference.h"
+#include "mirror/string.h"
 #include "parallel_move_resolver.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
@@ -110,10 +111,10 @@
         << " " << locations->Out();
   }
 
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    DCHECK(CheckType(instruction->InputAt(i)->GetType(), locations->InAt(i)))
-      << instruction->InputAt(i)->GetType()
-      << " " << locations->InAt(i);
+  HConstInputsRef inputs = instruction->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    DCHECK(CheckType(inputs[i]->GetType(), locations->InAt(i)))
+      << inputs[i]->GetType() << " " << locations->InAt(i);
   }
 
   HEnvironment* environment = instruction->GetEnvironment();
@@ -136,7 +137,20 @@
 
 size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
   auto pointer_size = InstructionSetPointerSize(GetInstructionSet());
-  return pointer_size * index;
+  return static_cast<size_t>(pointer_size) * index;
+}
+
+uint32_t CodeGenerator::GetArrayLengthOffset(HArrayLength* array_length) {
+  return array_length->IsStringLength()
+      ? mirror::String::CountOffset().Uint32Value()
+      : mirror::Array::LengthOffset().Uint32Value();
+}
+
+uint32_t CodeGenerator::GetArrayDataOffset(HArrayGet* array_get) {
+  DCHECK(array_get->GetType() == Primitive::kPrimChar || !array_get->IsStringCharAt());
+  return array_get->IsStringCharAt()
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(Primitive::ComponentSize(array_get->GetType())).Uint32Value();
 }
 
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
@@ -277,7 +291,8 @@
   DCHECK(!block_order.empty());
   DCHECK(block_order[0] == GetGraph()->GetEntryBlock());
   ComputeSpillMask();
-  first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
+  first_register_slot_in_slow_path_ = RoundUp(
+      (number_of_out_slots + number_of_spill_slots) * kVRegSize, GetPreferredSlotsAlignment());
 
   if (number_of_spill_slots == 0
       && !HasAllocatedCalleeSaveRegisters()
@@ -288,8 +303,7 @@
     SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
   } else {
     SetFrameSize(RoundUp(
-        number_of_spill_slots * kVRegSize
-        + number_of_out_slots * kVRegSize
+        first_register_slot_in_slow_path_
         + maximum_number_of_live_core_registers * GetWordSize()
         + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
         + FrameEntrySpillSize(),
@@ -300,7 +314,8 @@
 void CodeGenerator::CreateCommonInvokeLocationSummary(
     HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) {
   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
-  LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCall);
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnMainOnly);
 
   for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
@@ -364,7 +379,7 @@
 
   ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena();
   LocationSummary* locations =
-      new (allocator) LocationSummary(field_access, LocationSummary::kCall);
+      new (allocator) LocationSummary(field_access, LocationSummary::kCallOnMainOnly);
 
   locations->AddTemp(calling_convention.GetFieldIndexLocation());
 
@@ -485,7 +500,7 @@
                                                    bool code_generator_supports_read_barrier) {
   ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
   LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
           cls->CanCallRuntime())
             ? LocationSummary::kCallOnSlowPath
@@ -750,16 +765,24 @@
   LocationSummary* locations = instruction->GetLocations();
 
   uint32_t register_mask = locations->GetRegisterMask();
-  if (locations->OnlyCallsOnSlowPath()) {
-    // In case of slow path, we currently set the location of caller-save registers
-    // to register (instead of their stack location when pushed before the slow-path
-    // call). Therefore register_mask contains both callee-save and caller-save
-    // registers that hold objects. We must remove the caller-save from the mask, since
-    // they will be overwritten by the callee.
-    register_mask &= core_callee_save_mask_;
+  if (instruction->IsSuspendCheck()) {
+    // Suspend check has special ABI that saves the caller-save registers in callee,
+    // so we want to emit stack maps containing the registers.
+    // TODO: Register allocator still reserves space for the caller-save registers.
+    // We should add slow-path-specific caller-save information into LocationSummary
+    // and refactor the code here as well as in the register allocator to use it.
+  } else {
+    if (locations->OnlyCallsOnSlowPath()) {
+      // In case of slow path, we currently set the location of caller-save registers
+      // to register (instead of their stack location when pushed before the slow-path
+      // call). Therefore register_mask contains both callee-save and caller-save
+      // registers that hold objects. We must remove the caller-save from the mask, since
+      // they will be overwritten by the callee.
+      register_mask &= core_callee_save_mask_;
+    }
+    // The register mask must be a subset of callee-save registers.
+    DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
   }
-  // The register mask must be a subset of callee-save registers.
-  DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
   stack_map_stream_.BeginStackMapEntry(outer_dex_pc,
                                        native_pc,
                                        register_mask,
@@ -1159,23 +1182,23 @@
         << "instruction->DebugName()=" << instruction->DebugName()
         << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString();
   } else {
-    DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal())
+    DCHECK(instruction->GetLocations()->CallsOnSlowPath() || slow_path->IsFatal())
         << "instruction->DebugName()=" << instruction->DebugName()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
-           // When read barriers are enabled, some instructions use a
-           // slow path to emit a read barrier, which does not trigger
-           // GC, is not fatal, nor is emitted by HDeoptimize
-           // instructions.
+           // When (non-Baker) read barriers are enabled, some instructions
+           // use a slow path to emit a read barrier, which does not trigger
+           // GC.
            (kEmitCompilerReadBarrier &&
+            !kUseBakerReadBarrier &&
             (instruction->IsInstanceFieldGet() ||
              instruction->IsStaticFieldGet() ||
-             instruction->IsArraySet() ||
              instruction->IsArrayGet() ||
              instruction->IsLoadClass() ||
              instruction->IsLoadString() ||
              instruction->IsInstanceOf() ||
-             instruction->IsCheckCast())))
+             instruction->IsCheckCast() ||
+             (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))))
         << "instruction->DebugName()=" << instruction->DebugName()
         << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
@@ -1189,6 +1212,28 @@
       << instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : "");
 }
 
+void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+                                                                SlowPathCode* slow_path) {
+  DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath())
+      << "instruction->DebugName()=" << instruction->DebugName()
+      << " slow_path->GetDescription()=" << slow_path->GetDescription();
+  // Only the Baker read barrier marking slow path used by certains
+  // instructions is expected to invoke the runtime without recording
+  // PC-related information.
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK(instruction->IsInstanceFieldGet() ||
+         instruction->IsStaticFieldGet() ||
+         instruction->IsArrayGet() ||
+         instruction->IsLoadClass() ||
+         instruction->IsLoadString() ||
+         instruction->IsInstanceOf() ||
+         instruction->IsCheckCast() ||
+         (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) ||
+         (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified()))
+      << "instruction->DebugName()=" << instruction->DebugName()
+      << " slow_path->GetDescription()=" << slow_path->GetDescription();
+}
+
 void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
   RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
@@ -1298,4 +1343,18 @@
   locations->AddTemp(Location::RequiresRegister());
 }
 
+uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetSlowPathFlagOffset().Uint32Value();
+}
+
+uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index d69c410..fd396c4 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,7 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/bit_field.h"
+#include "base/enums.h"
 #include "compiled_method.h"
 #include "driver/compiler_options.h"
 #include "globals.h"
@@ -80,7 +81,11 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
 
+  // Save live core and floating-point caller-save registers and
+  // update the stack mask in `locations` for registers holding object
+  // references.
   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+  // Restore live core and floating-point caller-save registers.
   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
 
   bool IsCoreRegisterSaved(int reg) const {
@@ -187,7 +192,7 @@
   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
     // Note that this follows the current calling convention.
     return GetFrameSize()
-        + InstructionSetPointerSize(GetInstructionSet())  // Art method
+        + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
         + parameter->GetIndex() * kVRegSize;
   }
 
@@ -211,6 +216,8 @@
                                 size_t maximum_number_of_live_fpu_registers,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
+  // Backends can override this as necessary. For most, no special alignment is required.
+  virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -333,6 +340,9 @@
   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
 
+  bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
+  bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
+
   // Helper that returns the pointer offset of an index in an object array.
   // Note: this method assumes we always have the same pointer size, regardless
   // of the architecture.
@@ -340,6 +350,27 @@
   // Pointer variant for ArtMethod and ArtField arrays.
   size_t GetCachePointerOffset(uint32_t index);
 
+  // Helper that returns the offset of the array's length field.
+  // Note: Besides the normal arrays, we also use the HArrayLength for
+  // accessing the String's `count` field in String intrinsics.
+  static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
+
+  // Helper that returns the offset of the array's data.
+  // Note: Besides the normal arrays, we also use the HArrayGet for
+  // accessing the String's `value` field in String intrinsics.
+  static uint32_t GetArrayDataOffset(HArrayGet* array_get);
+
+  // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
+  template <PointerSize pointer_size>
+  static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
+    // The entry point list defines 30 ReadBarrierMarkRegX entry points.
+    DCHECK_LT(reg, 30u);
+    // The ReadBarrierMarkRegX entry points are ordered by increasing
+    // register number in Thread::tls_Ptr_.quick_entrypoints.
+    return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
+        + static_cast<size_t>(pointer_size) * reg;
+  }
+
   void EmitParallelMoves(Location from1,
                          Location to1,
                          Primitive::Type type1,
@@ -353,8 +384,14 @@
     return type == Primitive::kPrimNot && !value->IsNullConstant();
   }
 
+
+  // Perfoms checks pertaining to an InvokeRuntime call.
   void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
 
+  // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
+  static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+                                                          SlowPathCode* slow_path);
+
   void AddAllocatedRegister(Location location) {
     allocated_registers_.Add(location);
   }
@@ -444,10 +481,15 @@
                              SlowPathCode* slow_path) = 0;
 
   // Check if the desired_string_load_kind is supported. If it is, return it,
-  // otherwise return a fall-back info that should be used instead.
+  // otherwise return a fall-back kind that should be used instead.
   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) = 0;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) = 0;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -464,6 +506,9 @@
 
   virtual void GenerateNop() = 0;
 
+  uint32_t GetReferenceSlowFlagOffset() const;
+  uint32_t GetReferenceDisableFlagOffset() const;
+
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
@@ -488,6 +533,20 @@
     LabelType label;
   };
 
+  // Type patch info used for recording locations of required linker patches and
+  // target types. The actual type address can be absolute or PC-relative.
+  // TODO: Consider merging with MethodPatchInfo and StringPatchInfo - all these
+  // classes contain the dex file, some index and the label.
+  template <typename LabelType>
+  struct TypePatchInfo {
+    TypePatchInfo(const DexFile& df, uint32_t index)
+        : dex_file(df), type_index(index), label() { }
+
+    const DexFile& dex_file;
+    uint32_t type_index;
+    LabelType label;
+  };
+
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
                 size_t number_of_fpu_registers,
@@ -645,7 +704,7 @@
                     size_t number_of_registers,
                     const F* fpu_registers,
                     size_t number_of_fpu_registers,
-                    size_t pointer_size)
+                    PointerSize pointer_size)
       : registers_(registers),
         number_of_registers_(number_of_registers),
         fpu_registers_(fpu_registers),
@@ -668,7 +727,7 @@
   size_t GetStackOffsetOf(size_t index) const {
     // We still reserve the space for parameters passed by registers.
     // Add space for the method pointer.
-    return pointer_size_ + index * kVRegSize;
+    return static_cast<size_t>(pointer_size_) + index * kVRegSize;
   }
 
  private:
@@ -676,7 +735,7 @@
   const size_t number_of_registers_;
   const F* fpu_registers_;
   const size_t number_of_fpu_registers_;
-  const size_t pointer_size_;
+  const PointerSize pointer_size_;
 
   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
 };
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 45e9b58..6d9c55c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,8 +59,9 @@
 
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
-#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
 
 class NullCheckSlowPathARM : public SlowPathCode {
  public:
@@ -118,11 +119,9 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ b(GetReturnLabel());
     } else {
@@ -175,8 +174,11 @@
         locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    arm_codegen->InvokeRuntime(
-        QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    arm_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -312,7 +314,7 @@
                                  instruction_->GetDexPc(),
                                  this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
@@ -408,8 +410,8 @@
 // Slow path marking an object during a read barrier.
 class ReadBarrierMarkSlowPathARM : public SlowPathCode {
  public:
-  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj)
-      : SlowPathCode(instruction), out_(out), obj_(obj) {
+  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj)
+      : SlowPathCode(instruction), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -417,38 +419,55 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg_out = out_.AsRegister<Register>();
+    Register reg = obj_.AsRegister<Register>();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
-    arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
-    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
-    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
-
-    RestoreLiveRegisters(codegen, locations);
+    DCHECK_NE(reg, SP);
+    DCHECK_NE(reg, LR);
+    DCHECK_NE(reg, PC);
+    // IP is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary, it cannot be the entry point's input/output.
+    DCHECK_NE(reg, IP);
+    DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in R0):
+    //
+    //   R0 <- obj
+    //   R0 <- ReadBarrierMark(R0)
+    //   obj <- R0
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg);
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ b(GetExitLabel());
   }
 
  private:
-  const Location out_;
   const Location obj_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
@@ -489,9 +508,12 @@
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()))
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -503,7 +525,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = index_.AsRegister<Register>();
@@ -551,7 +573,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddConstant(index_reg, index_reg, offset_);
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -674,7 +700,8 @@
 };
 
 #undef __
-#define __ down_cast<ArmAssembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())->  // NOLINT
 
 inline Condition ARMCondition(IfCondition cond) {
   switch (cond) {
@@ -790,6 +817,9 @@
       boot_image_string_patches_(StringReferenceValueComparator(),
                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
@@ -938,7 +968,7 @@
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
     __ vpops(start_register, POPCOUNT(fpu_spill_mask_));
-    __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
+    __ cfi().AdjustCFAOffset(-static_cast<int>(kArmPointerSize) * POPCOUNT(fpu_spill_mask_));
     __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
   }
   // Pop LR into PC to return.
@@ -1190,7 +1220,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kArmWordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -1206,6 +1236,14 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                           HInstruction* instruction,
+                                                           SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+  __ blx(LR);
+}
+
 void InstructionCodeGeneratorARM::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   DCHECK(!successor->IsExitBlock());
 
@@ -1253,6 +1291,44 @@
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
+void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) {
+  Primitive::Type type = instruction->InputAt(0)->GetType();
+  Location lhs_loc = instruction->GetLocations()->InAt(0);
+  Location rhs_loc = instruction->GetLocations()->InAt(1);
+  if (rhs_loc.IsConstant()) {
+    // 0.0 is the only immediate that can be encoded directly in
+    // a VCMP instruction.
+    //
+    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+    // specify that in a floating-point comparison, positive zero
+    // and negative zero are considered equal, so we can use the
+    // literal 0.0 for both cases here.
+    //
+    // Note however that some methods (Float.equal, Float.compare,
+    // Float.compareTo, Double.equal, Double.compare,
+    // Double.compareTo, Math.max, Math.min, StrictMath.max,
+    // StrictMath.min) consider 0.0 to be (strictly) greater than
+    // -0.0. So if we ever translate calls to these methods into a
+    // HCompare instruction, we must handle the -0.0 case with
+    // care here.
+    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+    if (type == Primitive::kPrimFloat) {
+      __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
+  } else {
+    if (type == Primitive::kPrimFloat) {
+      __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
+               FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
+  }
+}
+
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
                                                   Label* false_label ATTRIBUTE_UNUSED) {
@@ -1353,22 +1429,14 @@
   Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
   Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
-  LocationSummary* locations = condition->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong:
       GenerateLongComparesAndJumps(condition, true_target, false_target);
       break;
     case Primitive::kPrimFloat:
-      __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      GenerateFPJumps(condition, true_target, false_target);
-      break;
     case Primitive::kPrimDouble:
-      __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-               FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+      GenerateVcmp(condition);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     default:
@@ -1549,7 +1617,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
@@ -1596,12 +1664,8 @@
       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
       break;
     case Primitive::kPrimFloat:
-      __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      GenerateFPJumps(cond, &true_label, &false_label);
-      break;
     case Primitive::kPrimDouble:
-      __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-               FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+      GenerateVcmp(cond);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
   }
@@ -1899,11 +1963,11 @@
   __ LoadFromOffset(kLoadWord, temp, temp,
         mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kArmPointerSize));
+      invoke->GetImtIndex(), kArmPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   uint32_t entry_point =
-      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
+      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
   // LR();
@@ -1996,7 +2060,7 @@
       (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
         && result_type == Primitive::kPrimLong)
        || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat))
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -2274,8 +2338,7 @@
         case Primitive::kPrimFloat: {
           // Processing a Dex `float-to-int' instruction.
           SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
-          __ vmovs(temp, in.AsFpuRegister<SRegister>());
-          __ vcvtis(temp, temp);
+          __ vcvtis(temp, in.AsFpuRegister<SRegister>());
           __ vmovrs(out.AsRegister<Register>(), temp);
           break;
         }
@@ -2283,9 +2346,7 @@
         case Primitive::kPrimDouble: {
           // Processing a Dex `double-to-int' instruction.
           SRegister temp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
-          DRegister temp_d = FromLowSToD(temp_s);
-          __ vmovd(temp_d, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
-          __ vcvtid(temp_s, temp_d);
+          __ vcvtid(temp_s, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
           __ vmovrs(out.AsRegister<Register>(), temp_s);
           break;
         }
@@ -2464,7 +2525,7 @@
 
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2501,13 +2562,18 @@
       break;
 
     case Primitive::kPrimLong: {
-      DCHECK(second.IsRegisterPair());
-      __ adds(out.AsRegisterPairLow<Register>(),
-              first.AsRegisterPairLow<Register>(),
-              ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ adc(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      if (second.IsConstant()) {
+        uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+        GenerateAddLongConst(out, first, value);
+      } else {
+        DCHECK(second.IsRegisterPair());
+        __ adds(out.AsRegisterPairLow<Register>(),
+                first.AsRegisterPairLow<Register>(),
+                ShifterOperand(second.AsRegisterPairLow<Register>()));
+        __ adc(out.AsRegisterPairHigh<Register>(),
+               first.AsRegisterPairHigh<Register>(),
+               ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      }
       break;
     }
 
@@ -2541,7 +2607,7 @@
 
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2577,13 +2643,18 @@
     }
 
     case Primitive::kPrimLong: {
-      DCHECK(second.IsRegisterPair());
-      __ subs(out.AsRegisterPairLow<Register>(),
-              first.AsRegisterPairLow<Register>(),
-              ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ sbc(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      if (second.IsConstant()) {
+        uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+        GenerateAddLongConst(out, first, -value);
+      } else {
+        DCHECK(second.IsRegisterPair());
+        __ subs(out.AsRegisterPairLow<Register>(),
+                first.AsRegisterPairLow<Register>(),
+                ShifterOperand(second.AsRegisterPairLow<Register>()));
+        __ sbc(out.AsRegisterPairHigh<Register>(),
+               first.AsRegisterPairHigh<Register>(),
+               ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      }
       break;
     }
 
@@ -2818,13 +2889,13 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   if (div->GetResultType() == Primitive::kPrimLong) {
     // pLdiv runtime call.
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
     // sdiv will be replaced by other instruction sequence.
   } else if (div->GetResultType() == Primitive::kPrimInt &&
              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
     // pIdivmod runtime call.
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -2943,7 +3014,7 @@
   Primitive::Type type = rem->GetResultType();
 
   // Most remainders are implemented in the runtime.
-  LocationSummary::CallKind call_kind = LocationSummary::kCall;
+  LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
   if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
     // sdiv will be replaced by other instruction sequence.
     call_kind = LocationSummary::kNoCall;
@@ -3480,7 +3551,7 @@
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
   } else {
@@ -3497,7 +3568,7 @@
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
-    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize);
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
     __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
     __ LoadFromOffset(kLoadWord, LR, temp, code_offset.Int32Value());
     __ blx(LR);
@@ -3513,7 +3584,7 @@
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(R0));
@@ -3621,7 +3692,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -3666,12 +3737,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       __ LoadImmediate(out, 0);
-      if (type == Primitive::kPrimFloat) {
-        __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
-      } else {
-        __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
-                 FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
-      }
+      GenerateVcmp(compare);
       __ vmstat();  // transfer FP status register to ARM APSR.
       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
@@ -3697,7 +3763,7 @@
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -3965,6 +4031,17 @@
   }
 }
 
+Location LocationsBuilderARM::ArithmeticZeroOrFpuRegister(HInstruction* input) {
+  DCHECK(input->GetType() == Primitive::kPrimDouble || input->GetType() == Primitive::kPrimFloat)
+      << input->GetType();
+  if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
+      (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
+    return Location::ConstantLocation(input->AsConstant());
+  } else {
+    return Location::RequiresFpuRegister();
+  }
+}
+
 Location LocationsBuilderARM::ArmEncodableConstantOrRegister(HInstruction* constant,
                                                              Opcode opcode) {
   DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
@@ -3979,31 +4056,51 @@
                                                        Opcode opcode) {
   uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
   if (Primitive::Is64BitType(input_cst->GetType())) {
-    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) &&
-        CanEncodeConstantAsImmediate(High32Bits(value), opcode);
+    Opcode high_opcode = opcode;
+    SetCc low_set_cc = kCcDontCare;
+    switch (opcode) {
+      case SUB:
+        // Flip the operation to an ADD.
+        value = -value;
+        opcode = ADD;
+        FALLTHROUGH_INTENDED;
+      case ADD:
+        if (Low32Bits(value) == 0u) {
+          return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare);
+        }
+        high_opcode = ADC;
+        low_set_cc = kCcSet;
+        break;
+      default:
+        break;
+    }
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) &&
+        CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare);
   } else {
     return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
   }
 }
 
-bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) {
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value,
+                                                       Opcode opcode,
+                                                       SetCc set_cc) {
   ShifterOperand so;
   ArmAssembler* assembler = codegen_->GetAssembler();
-  if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) {
+  if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) {
     return true;
   }
   Opcode neg_opcode = kNoOperand;
   switch (opcode) {
-    case AND:
-      neg_opcode = BIC;
-      break;
-    case ORR:
-      neg_opcode = ORN;
-      break;
+    case AND: neg_opcode = BIC; value = ~value; break;
+    case ORR: neg_opcode = ORN; value = ~value; break;
+    case ADD: neg_opcode = SUB; value = -value; break;
+    case ADC: neg_opcode = SBC; value = ~value; break;
+    case SUB: neg_opcode = ADD; value = -value; break;
+    case SBC: neg_opcode = ADC; value = ~value; break;
     default:
       return false;
   }
-  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so);
+  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
 }
 
 void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -4251,6 +4348,122 @@
   codegen_->GenerateNullCheck(instruction);
 }
 
+static LoadOperandType GetLoadOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kLoadWord;
+    case Primitive::kPrimBoolean:
+      return kLoadUnsignedByte;
+    case Primitive::kPrimByte:
+      return kLoadSignedByte;
+    case Primitive::kPrimChar:
+      return kLoadUnsignedHalfword;
+    case Primitive::kPrimShort:
+      return kLoadSignedHalfword;
+    case Primitive::kPrimInt:
+      return kLoadWord;
+    case Primitive::kPrimLong:
+      return kLoadWordPair;
+    case Primitive::kPrimFloat:
+      return kLoadSWord;
+    case Primitive::kPrimDouble:
+      return kLoadDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+static StoreOperandType GetStoreOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kStoreWord;
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      return kStoreByte;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      return kStoreHalfword;
+    case Primitive::kPrimInt:
+      return kStoreWord;
+    case Primitive::kPrimLong:
+      return kStoreWordPair;
+    case Primitive::kPrimFloat:
+      return kStoreSWord;
+    case Primitive::kPrimDouble:
+      return kStoreDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARM::LoadFromShiftedRegOffset(Primitive::Type type,
+                                                Location out_loc,
+                                                Register base,
+                                                Register reg_offset,
+                                                Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+      __ ldrsb(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimBoolean:
+      __ ldrb(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimShort:
+      __ ldrsh(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimChar:
+      __ ldrh(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ ldr(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARM::StoreToShiftedRegOffset(Primitive::Type type,
+                                               Location loc,
+                                               Register base,
+                                               Register reg_offset,
+                                               Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+      __ strb(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      __ strh(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ str(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
   bool object_array_get_with_read_barrier =
       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
@@ -4284,75 +4497,41 @@
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Location out_loc = locations->Out();
-
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   Primitive::Type type = instruction->GetType();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
   switch (type) {
-    case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
-        __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
-        __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset);
-      }
-      break;
-    }
-
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
     case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadFromOffset(kLoadWord, out, obj, offset);
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+
+        LoadOperandType load_type = GetLoadOperandType(type);
+        __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+        Register temp = IP;
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+          }
+          temp = obj;
+        } else {
+          __ add(temp, obj, ShifterOperand(data_offset));
+        }
+        codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
       }
       break;
     }
@@ -4361,7 +4540,6 @@
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       // /* HeapReference<Object> */ out =
       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
@@ -4382,8 +4560,22 @@
           // reference, if heap poisoning is enabled).
           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
         } else {
-          __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-          __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+          Register temp = IP;
+
+          if (has_intermediate_address) {
+            // We do not need to compute the intermediate address from the array: the
+            // input instruction has done it already. See the comment in
+            // `TryExtractArrayAccessAddress()`.
+            if (kIsDebugBuild) {
+              HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+              DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+            }
+            temp = obj;
+          } else {
+            __ add(temp, obj, ShifterOperand(data_offset));
+          }
+          codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+
           codegen_->MaybeRecordImplicitNullCheck(instruction);
           // If read barriers are enabled, emit read barriers other than
           // Baker's using a slow path (and also unpoison the loaded
@@ -4396,7 +4588,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -4409,7 +4600,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       SRegister out = out_loc.AsFpuRegister<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -4422,7 +4612,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -4485,54 +4674,68 @@
   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  uint32_t data_offset =
+      mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+  Location value_loc = locations->InAt(2);
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ StoreToOffset(kStoreByte, value, array, offset);
-      } else {
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>()));
-        __ StoreToOffset(kStoreByte, value, IP, data_offset);
-      }
-      break;
-    }
-
+    case Primitive::kPrimByte:
     case Primitive::kPrimShort:
-    case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ StoreToOffset(kStoreHalfword, value, array, offset);
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t full_offset =
+            data_offset + (const_index << Primitive::ComponentSizeShift(value_type));
+        StoreOperandType store_type = GetStoreOperandType(value_type);
+        __ StoreToOffset(store_type, value_loc.AsRegister<Register>(), array, full_offset);
       } else {
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ StoreToOffset(kStoreHalfword, value, IP, data_offset);
+        Register temp = IP;
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+          }
+          temp = array;
+        } else {
+          __ add(temp, array, ShifterOperand(data_offset));
+        }
+        codegen_->StoreToShiftedRegOffset(value_type,
+                                          value_loc,
+                                          temp,
+                                          index.AsRegister<Register>());
       }
       break;
     }
 
     case Primitive::kPrimNot: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Location value_loc = locations->InAt(2);
       Register value = value_loc.AsRegister<Register>();
-      Register source = value;
+      // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
+      // See the comment in instruction_simplifier_shared.cc.
+      DCHECK(!has_intermediate_address);
 
       if (instruction->InputAt(2)->IsNullConstant()) {
         // Just setting null.
         if (index.IsConstant()) {
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, source, array, offset);
+          __ StoreToOffset(kStoreWord, value, array, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
-          __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-          __ StoreToOffset(kStoreWord, source, IP, data_offset);
+          __ add(IP, array, ShifterOperand(data_offset));
+          codegen_->StoreToShiftedRegOffset(value_type,
+                                            value_loc,
+                                            IP,
+                                            index.AsRegister<Register>());
         }
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
@@ -4561,8 +4764,11 @@
             __ StoreToOffset(kStoreWord, value, array, offset);
           } else {
             DCHECK(index.IsRegister()) << index;
-            __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-            __ StoreToOffset(kStoreWord, value, IP, data_offset);
+            __ add(IP, array, ShifterOperand(data_offset));
+            codegen_->StoreToShiftedRegOffset(value_type,
+                                              value_loc,
+                                              IP,
+                                              index.AsRegister<Register>());
           }
           codegen_->MaybeRecordImplicitNullCheck(instruction);
           __ b(&done);
@@ -4629,6 +4835,7 @@
         }
       }
 
+      Register source = value;
       if (kPoisonHeapReferences) {
         // Note that in the case where `value` is a null reference,
         // we do not enter this block, as a null reference does not
@@ -4645,8 +4852,12 @@
         __ StoreToOffset(kStoreWord, source, array, offset);
       } else {
         DCHECK(index.IsRegister()) << index;
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ StoreToOffset(kStoreWord, source, IP, data_offset);
+
+        __ add(IP, array, ShifterOperand(data_offset));
+        codegen_->StoreToShiftedRegOffset(value_type,
+                                          Location::RegisterLocation(source),
+                                          IP,
+                                          index.AsRegister<Register>());
       }
 
       if (!may_need_runtime_call_for_type_check) {
@@ -4666,23 +4877,7 @@
       break;
     }
 
-    case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ StoreToOffset(kStoreWord, value, array, offset);
-      } else {
-        DCHECK(index.IsRegister()) << index;
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ StoreToOffset(kStoreWord, value, IP, data_offset);
-      }
-      break;
-    }
-
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       Location value = locations->InAt(2);
       if (index.IsConstant()) {
         size_t offset =
@@ -4696,7 +4891,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       Location value = locations->InAt(2);
       DCHECK(value.IsFpuRegister());
       if (index.IsConstant()) {
@@ -4710,7 +4904,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       Location value = locations->InAt(2);
       DCHECK(value.IsFpuRegisterPair());
       if (index.IsConstant()) {
@@ -4744,13 +4937,44 @@
 
 void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
+void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+
+  if (second.IsRegister()) {
+    __ add(out.AsRegister<Register>(),
+           first.AsRegister<Register>(),
+           ShifterOperand(second.AsRegister<Register>()));
+  } else {
+    __ AddConstant(out.AsRegister<Register>(),
+                   first.AsRegister<Register>(),
+                   second.GetConstant()->AsIntConstant()->GetValue());
+  }
+}
+
 void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
       ? LocationSummary::kCallOnSlowPath
@@ -4785,7 +5009,7 @@
   if (can_be_null) {
     __ CompareAndBranchIfZero(value, &is_null);
   }
-  __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmWordSize>().Int32Value());
+  __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
   __ strb(card, Address(card, temp));
   if (can_be_null) {
@@ -4836,7 +5060,7 @@
   }
 
   __ LoadFromOffset(
-      kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());
+      kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
   if (successor == nullptr) {
     __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -5075,13 +5299,71 @@
   __ Pop(static_cast<Register>(reg));
 }
 
+HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(R0),
-      /* code_generator_supports_read_barrier */ true);
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(R0),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
@@ -5098,37 +5380,97 @@
 
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    GenerateGcRootFieldLoad(
-        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
-    // /* GcRoot<mirror::Class>[] */ out =
-    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-    __ LoadFromOffset(kLoadWord,
-                      out,
-                      current_method,
-                      ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-    // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                                    cls->GetTypeIndex()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(out, out, ShifterOperand(PC));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
+      // a 128B range. To try and reduce the number of literals if we load multiple types,
+      // simply split the dex cache address to a 128B aligned base loaded from a literal
+      // and the remaining offset embedded in the load.
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
+      uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
+      uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
+      __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      Register base_reg = locations->InAt(0).AsRegister<Register>();
+      HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
+      int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
+      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      __ LoadFromOffset(kLoadWord,
+                        out,
+                        current_method,
+                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+    }
+  }
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
-          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
-      }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
-      } else {
-        __ Bind(slow_path->GetExitLabel());
-      }
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -5247,57 +5589,19 @@
       __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
       return;  // No dex cache slow path.
     }
-    case HLoadString::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(load->GetAddress(), 0u);
-      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
-      // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
-      // a 128B range. To try and reduce the number of literals if we load multiple strings,
-      // simply split the dex cache address to a 128B aligned base loaded from a literal
-      // and the remaining offset embedded in the load.
-      static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
-      DCHECK_ALIGNED(load->GetAddress(), 4u);
-      constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
-      uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
-      uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
-      __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
-      GenerateGcRootFieldLoad(load, out_loc, out, offset);
-      break;
-    }
-    case HLoadString::LoadKind::kDexCachePcRelative: {
-      Register base_reg = locations->InAt(0).AsRegister<Register>();
-      HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase();
-      int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset();
-      GenerateGcRootFieldLoad(load, out_loc, base_reg, offset);
-      break;
-    }
-    case HLoadString::LoadKind::kDexCacheViaMethod: {
-      Register current_method = locations->InAt(0).AsRegister<Register>();
-
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      GenerateGcRootFieldLoad(
-          load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
-      __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-      // /* GcRoot<mirror::String> */ out = out[string_index]
-      GenerateGcRootFieldLoad(
-          load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-      break;
-    }
     default:
-      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
-      UNREACHABLE();
+      break;
   }
 
-  if (!load->IsInDexCache()) {
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
-    codegen_->AddSlowPath(slow_path);
-    __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
-    __ Bind(slow_path->GetExitLabel());
-  }
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
+  codegen_->AddSlowPath(slow_path);
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kArmWordSize>().Int32Value();
+  return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
 }
 
 void LocationsBuilderARM::VisitLoadException(HLoadException* load) {
@@ -5322,7 +5626,7 @@
 
 void LocationsBuilderARM::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -5723,7 +6027,7 @@
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -5886,6 +6190,34 @@
   __ eor(out, first, ShifterOperand(value));
 }
 
+void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out,
+                                                       Location first,
+                                                       uint64_t value) {
+  Register out_low = out.AsRegisterPairLow<Register>();
+  Register out_high = out.AsRegisterPairHigh<Register>();
+  Register first_low = first.AsRegisterPairLow<Register>();
+  Register first_high = first.AsRegisterPairHigh<Register>();
+  uint32_t value_low = Low32Bits(value);
+  uint32_t value_high = High32Bits(value);
+  if (value_low == 0u) {
+    if (out_low != first_low) {
+      __ mov(out_low, ShifterOperand(first_low));
+    }
+    __ AddConstant(out_high, first_high, value_high);
+    return;
+  }
+  __ AddConstantSetFlags(out_low, first_low, value_low);
+  ShifterOperand so;
+  if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) {
+    __ adc(out_high, first_high, so);
+  } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) {
+    __ sbc(out_high, first_high, so);
+  } else {
+    LOG(FATAL) << "Unexpected constant " << value_high;
+    UNREACHABLE();
+  }
+}
+
 void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location first = locations->InAt(0);
@@ -6045,14 +6377,14 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      // Slow path used to mark the GC root `root`.
+      // Slow path marking the GC root `root`.
       SlowPathCode* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
       codegen_->AddSlowPath(slow_path);
 
       // IP = Thread::Current()->GetIsGcMarking()
       __ LoadFromOffset(
-          kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
+          kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value());
       __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
     } else {
@@ -6083,8 +6415,9 @@
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
+  ScaleFactor no_scale_factor = TIMES_1;
   GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, offset, no_index, temp, needs_null_check);
+      instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check);
 }
 
 void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6097,10 +6430,14 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  ScaleFactor scale_factor = TIMES_4;
   GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, data_offset, index, temp, needs_null_check);
+      instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
 }
 
 void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6108,6 +6445,7 @@
                                                                  Register obj,
                                                                  uint32_t offset,
                                                                  Location index,
+                                                                 ScaleFactor scale_factor,
                                                                  Location temp,
                                                                  bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
@@ -6144,35 +6482,32 @@
   // /* LockWord */ lock_word = LockWord(monitor)
   static_assert(sizeof(LockWord) == sizeof(int32_t),
                 "art::LockWord and int32_t have different sizes.");
-  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
-  __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift);
-  __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
-  static_assert(
-      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
-      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
 
-  // Introduce a dependency on the high bits of rb_state, which shall
-  // be all zeroes, to prevent load-load reordering, and without using
+  // Introduce a dependency on the lock_word including the rb_state,
+  // which shall prevent load-load reordering without using
   // a memory barrier (which would be more expensive).
-  // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0
-  __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
-  // obj is unchanged by this operation, but its value now depends on
-  // IP, which depends on temp_reg.
-  __ add(obj, obj, ShifterOperand(IP));
+  // `obj` is unchanged by this operation, but its value now depends
+  // on `temp_reg`.
+  __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
 
   // The actual reference load.
   if (index.IsValid()) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    // /* HeapReference<Object> */ ref =
-    //     *(obj + offset + index * sizeof(HeapReference<Object>))
+    // Load types involving an "index": ArrayGet and
+    // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
     if (index.IsConstant()) {
       size_t computed_offset =
-          (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset;
+          (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
       __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
     } else {
-      __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+      // Handle the special case of the
+      // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use
+      // a register pair as index ("long offset"), of which only the low
+      // part contains data.
+      Register index_reg = index.IsRegisterPair()
+          ? index.AsRegisterPairLow<Register>()
+          : index.AsRegister<Register>();
+      __ add(IP, obj, ShifterOperand(index_reg, LSL, scale_factor));
       __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
     }
   } else {
@@ -6183,15 +6518,21 @@
   // Object* ref = ref_addr->AsMirrorPtr()
   __ MaybeUnpoisonHeapReference(ref_reg);
 
-  // Slow path used to mark the object `ref` when it is gray.
+  // Slow path marking the object `ref` when it is gray.
   SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref);
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
   AddSlowPath(slow_path);
 
   // if (rb_state == ReadBarrier::gray_ptr_)
   //   ref = ReadBarrier::Mark(ref);
-  __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_));
-  __ b(slow_path->GetEntryLabel(), EQ);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit out of the lock word with LSRS
+  // which can be a 16-bit instruction unlike the TST immediate.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
+  __ b(slow_path->GetEntryLabel(), CS);  // Carry flag is the last bit shifted out by LSRS.
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -6403,7 +6744,7 @@
       // LR = callee_method->entry_point_from_quick_compiled_code_
       __ LoadFromOffset(
           kLoadWord, LR, callee_method.AsRegister<Register>(),
-          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value());
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
       // LR()
       __ blx(LR);
       break;
@@ -6437,7 +6778,7 @@
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kArmWordSize).Int32Value();
+      kArmPointerSize).Int32Value();
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -6450,6 +6791,11 @@
   return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
 }
 
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch(
+    const DexFile& dex_file, uint32_t type_index) {
+  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
+}
+
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -6468,6 +6814,13 @@
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
 }
 
+Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                           uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
 Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
   bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
   Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
@@ -6487,6 +6840,8 @@
       /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() +
       boot_image_string_patches_.size() +
       /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * pc_relative_type_patches_.size() +
       boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
@@ -6562,6 +6917,35 @@
                                                                add_pc_offset,
                                                                string_index));
   }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    uint32_t type_index = info.offset_or_index;
+    DCHECK(info.add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(info.movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(movw_offset,
+                                                             &dex_file,
+                                                             add_pc_offset,
+                                                             type_index));
+    // Add MOVT patch.
+    DCHECK(info.movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(movt_offset,
+                                                             &dex_file,
+                                                             add_pc_offset,
+                                                             type_index));
+  }
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
     Literal* literal = entry.second;
@@ -6781,7 +7165,7 @@
                       method_offset);
   } else {
     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kArmPointerSize));
+        instruction->GetIndex(), kArmPointerSize));
     __ LoadFromOffset(kLoadWord,
                       locations->Out().AsRegister<Register>(),
                       locations->InAt(0).AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 0020f7b..5d9b2dc 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -17,13 +17,14 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
 
+#include "base/enums.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
+#include "string_reference.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm/assembler_thumb2.h"
-#include "utils/string_reference.h"
+#include "utils/type_reference.h"
 
 namespace art {
 namespace arm {
@@ -31,7 +32,7 @@
 class CodeGeneratorARM;
 
 // Use a local definition to prevent copying mistakes.
-static constexpr size_t kArmWordSize = kArmPointerSize;
+static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
 
 static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
@@ -179,9 +180,10 @@
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  Location ArithmeticZeroOrFpuRegister(HInstruction* input);
   Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
   bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
-  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
 
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitorARM parameter_visitor_;
@@ -218,6 +220,7 @@
   void GenerateAndConst(Register out, Register first, uint32_t value);
   void GenerateOrrConst(Register out, Register first, uint32_t value);
   void GenerateEorConst(Register out, Register first, uint32_t value);
+  void GenerateAddLongConst(Location out, Location first, uint64_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void HandleCondition(HCondition* condition);
   void HandleIntegerRotate(LocationSummary* locations);
@@ -280,6 +283,7 @@
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
+  void GenerateVcmp(HInstruction* instruction);
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -364,6 +368,24 @@
   // Helper method to move a 64bits value between two locations.
   void Move64(Location destination, Location source);
 
+  void LoadOrStoreToOffset(Primitive::Type type,
+                           Location loc,
+                           Register base,
+                           int32_t offset,
+                           bool is_load,
+                           Condition cond = AL);
+
+  void LoadFromShiftedRegOffset(Primitive::Type type,
+                                Location out_loc,
+                                Register base,
+                                Register reg_offset,
+                                Condition cond = AL);
+  void StoreToShiftedRegOffset(Primitive::Type type,
+                               Location out_loc,
+                               Register base,
+                               Register reg_offset,
+                               Condition cond = AL);
+
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
                      HInstruction* instruction,
@@ -375,6 +397,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   // Emit a write barrier.
   void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
 
@@ -407,6 +435,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -419,10 +452,10 @@
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
-  // and boot image strings. The only difference is the interpretation of the offset_or_index.
-  // The PC-relative address is loaded with three instructions, MOVW+MOVT
-  // to load the offset to base_reg and then ADD base_reg, PC. The offset is
-  // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // and boot image strings/types. The only difference is the interpretation of the
+  // offset_or_index. The PC-relative address is loaded with three instructions,
+  // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset
+  // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
   // currently emit these 3 instructions together, instruction scheduling could
   // split this sequence apart, so we keep separate labels for each of them.
   struct PcRelativePatchInfo {
@@ -431,7 +464,7 @@
     PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
 
     const DexFile& target_dex_file;
-    // Either the dex cache array element offset or the string index.
+    // Either the dex cache array element offset or the string/type index.
     uint32_t offset_or_index;
     Label movw_label;
     Label movt_label;
@@ -439,9 +472,11 @@
   };
 
   PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
   Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
   Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
 
@@ -464,6 +499,16 @@
                                              Location index,
                                              Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -519,16 +564,6 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 Register obj,
-                                                 uint32_t offset,
-                                                 Location index,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
@@ -536,6 +571,9 @@
   using BootStringToLiteralMap = ArenaSafeMap<StringReference,
                                               Literal*,
                                               StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            Literal*,
+                                            TypeReferenceValueComparator>;
 
   Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
@@ -568,6 +606,10 @@
   BootStringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 8e58b15..cc8985d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -33,8 +33,7 @@
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
 
-
-using namespace vixl;   // NOLINT(build/namespaces)
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
 #ifdef __
 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
@@ -132,8 +131,9 @@
   return ARM64ReturnLocation(return_type);
 }
 
-#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
 
 // Calculate memory accessing operand for save/restore live registers.
 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
@@ -146,20 +146,20 @@
                                          codegen->GetNumberOfFloatingPointRegisters()));
 
   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize,
-      register_set->GetCoreRegisters() & (~callee_saved_core_registers.list()));
+      register_set->GetCoreRegisters() & (~callee_saved_core_registers.GetList()));
   CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize,
-      register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list()));
+      register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.GetList()));
 
   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
 
   Register base = masm->StackPointer();
-  int64_t core_spill_size = core_list.TotalSizeInBytes();
-  int64_t fp_spill_size = fp_list.TotalSizeInBytes();
+  int64_t core_spill_size = core_list.GetTotalSizeInBytes();
+  int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
   int64_t reg_size = kXRegSizeInBytes;
   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
   uint32_t ls_access_size = WhichPowerOf2(reg_size);
-  if (((core_list.Count() > 1) || (fp_list.Count() > 1)) &&
+  if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
     // If the offset does not fit in the instruction's immediate field, use an alternate register
     // to compute the base address(float point registers spill base address).
@@ -236,8 +236,11 @@
     codegen->EmitParallelMoves(
         locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
         locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
-    arm64_codegen->InvokeRuntime(
-        QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    arm64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -395,11 +398,9 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ B(GetReturnLabel());
     } else {
@@ -407,7 +408,7 @@
     }
   }
 
-  vixl::Label* GetReturnLabel() {
+  vixl::aarch64::Label* GetReturnLabel() {
     DCHECK(successor_ == nullptr);
     return &return_label_;
   }
@@ -423,7 +424,7 @@
   HBasicBlock* const successor_;
 
   // If `successor_` is null, the label to branch to after the suspend check.
-  vixl::Label return_label_;
+  vixl::aarch64::Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
 };
@@ -459,7 +460,7 @@
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
           QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
                            const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
@@ -563,9 +564,9 @@
   __ Bind(&table_start_);
   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
   for (uint32_t i = 0; i < num_entries; i++) {
-    vixl::Label* target_label = codegen->GetLabelOf(successors[i]);
+    vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
     DCHECK(target_label->IsBound());
-    ptrdiff_t jump_offset = target_label->location() - table_start_.location();
+    ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
     Literal<int32_t> literal(jump_offset);
@@ -576,8 +577,8 @@
 // Slow path marking an object during a read barrier.
 class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj)
-      : SlowPathCodeARM64(instruction), out_(out), obj_(obj) {
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj)
+      : SlowPathCodeARM64(instruction), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -585,38 +586,54 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Primitive::Type type = Primitive::kPrimNot;
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg()));
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
-    arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type);
-    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
-                                 instruction_,
-                                 instruction_->GetDexPc(),
-                                 this);
-    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
-    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
-
-    RestoreLiveRegisters(codegen, locations);
+    DCHECK_NE(obj_.reg(), LR);
+    DCHECK_NE(obj_.reg(), WSP);
+    DCHECK_NE(obj_.reg(), WZR);
+    // IP0 is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary, it cannot be the entry point's input/output.
+    DCHECK_NE(obj_.reg(), IP0);
+    DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in W0):
+    //
+    //   W0 <- obj
+    //   W0 <- ReadBarrierMark(W0)
+    //   obj <- W0
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg());
+    // This runtime call does not require a stack map.
+    arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ B(GetExitLabel());
   }
 
  private:
-  const Location out_;
   const Location obj_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
@@ -657,15 +674,17 @@
     Primitive::Type type = Primitive::kPrimNot;
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()))
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
-    // The read barrier instrumentation does not support the
-    // HArm64IntermediateAddress instruction yet.
+    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
     DCHECK(!(instruction_->IsArrayGet() &&
-             instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
 
@@ -676,7 +695,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
@@ -724,16 +743,17 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ Add(index_reg, index_reg, Operand(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
             << instruction_->AsInvoke()->GetIntrinsic();
         DCHECK_EQ(offset_, 0U);
-        DCHECK(index_.IsRegisterPair());
-        // UnsafeGet's offset location is a register pair, the low
-        // part contains the correct offset.
-        index = index_.ToLow();
+        DCHECK(index_.IsRegister());
       }
     }
 
@@ -776,8 +796,8 @@
 
  private:
   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
-    size_t ref = static_cast<int>(XRegisterFrom(ref_).code());
-    size_t obj = static_cast<int>(XRegisterFrom(obj_).code());
+    size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
+    size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
         return Register(VIXLRegCodeFromART(i), kXRegSize);
@@ -895,8 +915,8 @@
                     kNumberOfAllocatableRegisters,
                     kNumberOfAllocatableFPRegisters,
                     kNumberOfAllocatableRegisterPairs,
-                    callee_saved_core_registers.list(),
-                    callee_saved_fp_registers.list(),
+                    callee_saved_core_registers.GetList(),
+                    callee_saved_fp_registers.GetList(),
                     compiler_options,
                     stats),
       block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -919,6 +939,9 @@
       boot_image_string_patches_(StringReferenceValueComparator(),
                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
@@ -1043,17 +1066,17 @@
   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
 }
 
-vixl::CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
+CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
-  return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
-                          core_spill_mask_);
+  return CPURegList(CPURegister::kRegister, kXRegSize,
+                    core_spill_mask_);
 }
 
-vixl::CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
+CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
                                          GetNumberOfFloatingPointRegisters()));
-  return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
-                          fpu_spill_mask_);
+  return CPURegList(CPURegister::kFPRegister, kDRegSize,
+                    fpu_spill_mask_);
 }
 
 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -1077,11 +1100,11 @@
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
-  vixl::Label done;
+  vixl::aarch64::Label done;
   if (value_can_be_null) {
     __ Cbz(value, &done);
   }
-  __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64WordSize>().Int32Value()));
+  __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
   __ Strb(card, MemOperand(card, temp.X()));
   if (value_can_be_null) {
@@ -1102,12 +1125,12 @@
   CPURegList reserved_core_registers = vixl_reserved_core_registers;
   reserved_core_registers.Combine(runtime_reserved_core_registers);
   while (!reserved_core_registers.IsEmpty()) {
-    blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
+    blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
   }
 
   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
   while (!reserved_fp_registers.IsEmpty()) {
-    blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
+    blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
   }
 
   if (GetGraph()->IsDebuggable()) {
@@ -1116,7 +1139,7 @@
     // now, just block them.
     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
     while (!reserved_fp_registers_debuggable.IsEmpty()) {
-      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
+      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
     }
   }
 }
@@ -1260,17 +1283,21 @@
       UseScratchRegisterScope temps(GetVIXLAssembler());
       HConstant* src_cst = source.GetConstant();
       CPURegister temp;
-      if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
-        temp = temps.AcquireW();
-      } else if (src_cst->IsLongConstant()) {
-        temp = temps.AcquireX();
-      } else if (src_cst->IsFloatConstant()) {
-        temp = temps.AcquireS();
+      if (src_cst->IsZeroBitPattern()) {
+        temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) ? xzr : wzr;
       } else {
-        DCHECK(src_cst->IsDoubleConstant());
-        temp = temps.AcquireD();
+        if (src_cst->IsIntConstant()) {
+          temp = temps.AcquireW();
+        } else if (src_cst->IsLongConstant()) {
+          temp = temps.AcquireX();
+        } else if (src_cst->IsFloatConstant()) {
+          temp = temps.AcquireS();
+        } else {
+          DCHECK(src_cst->IsDoubleConstant());
+          temp = temps.AcquireD();
+        }
+        MoveConstant(temp, src_cst);
       }
-      MoveConstant(temp, src_cst);
       __ Str(temp, StackOperandFrom(destination));
     } else {
       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
@@ -1327,7 +1354,7 @@
   DCHECK(!src.IsPostIndex());
 
   // TODO(vixl): Let the MacroAssembler handle MemOperand.
-  __ Add(temp_base, src.base(), OperandFromMemOperand(src));
+  __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
   MemOperand base = MemOperand(temp_base);
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1419,7 +1446,7 @@
 
   // TODO(vixl): Let the MacroAssembler handle this.
   Operand op = OperandFromMemOperand(dst);
-  __ Add(temp_base, dst.base(), op);
+  __ Add(temp_base, dst.GetBaseRegister(), op);
   MemOperand base = MemOperand(temp_base);
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1455,7 +1482,7 @@
                                        HInstruction* instruction,
                                        uint32_t dex_pc,
                                        SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kArm64WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -1472,8 +1499,17 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                             HInstruction* instruction,
+                                                             SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  BlockPoolsScope block_pools(GetVIXLAssembler());
+  __ Ldr(lr, MemOperand(tr, entry_point_offset));
+  __ Blr(lr);
+}
+
 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
-                                                                     vixl::Register class_reg) {
+                                                                     Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
@@ -1529,7 +1565,7 @@
   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   Register temp = temps.AcquireW();
 
-  __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64WordSize>().SizeValue()));
+  __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
   if (successor == nullptr) {
     __ Cbnz(temp, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -1738,7 +1774,7 @@
         __ Sub(dst, lhs, rhs);
       } else if (instr->IsRor()) {
         if (rhs.IsImmediate()) {
-          uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1);
+          uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
           __ Ror(dst, lhs, shift);
         } else {
           // Ensure shift distance is in the same size register as the result. If
@@ -1801,7 +1837,7 @@
       Register lhs = InputRegisterAt(instr, 0);
       Operand rhs = InputOperandAt(instr, 1);
       if (rhs.IsImmediate()) {
-        uint32_t shift_value = rhs.immediate() &
+        uint32_t shift_value = rhs.GetImmediate() &
             (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
         if (instr->IsShl()) {
           __ Lsl(dst, lhs, shift_value);
@@ -1811,7 +1847,7 @@
           __ Lsr(dst, lhs, shift_value);
         }
       } else {
-        Register rhs_reg = dst.IsX() ? rhs.reg().X() : rhs.reg().W();
+        Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
 
         if (instr->IsShl()) {
           __ Lsl(dst, lhs, rhs_reg);
@@ -1948,9 +1984,8 @@
   }
 }
 
-void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the
-  // HArm64IntermediateAddress instruction yet.
+void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
   DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1959,10 +1994,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
-    HArm64IntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the
-  // HArm64IntermediateAddress instruction yet.
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
+    HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
   DCHECK(!kEmitCompilerReadBarrier);
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
@@ -1997,13 +2031,14 @@
   if (instr->GetType() == Primitive::kPrimLong &&
       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
-    vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize;
+    vixl::aarch64::Instruction* prev =
+        masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
     if (prev->IsLoadOrStore()) {
       // Make sure we emit only exactly one nop.
-      vixl::CodeBufferCheckScope scope(masm,
-                                       vixl::kInstructionSize,
-                                       vixl::CodeBufferCheckScope::kCheck,
-                                       vixl::CodeBufferCheckScope::kExactSize);
+      vixl::aarch64::CodeBufferCheckScope scope(masm,
+                                                kInstructionSize,
+                                                vixl::aarch64::CodeBufferCheckScope::kCheck,
+                                                vixl::aarch64::CodeBufferCheckScope::kExactSize);
       __ nop();
     }
   }
@@ -2050,8 +2085,8 @@
   Register obj = InputRegisterAt(instruction, 0);
   LocationSummary* locations = instruction->GetLocations();
   Location index = locations->InAt(1);
-  uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
   Location out = locations->Out();
+  uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
@@ -2061,9 +2096,8 @@
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
     Register temp = temps.AcquireW();
-    // The read barrier instrumentation does not support the
-    // HArm64IntermediateAddress instruction yet.
-    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2076,15 +2110,15 @@
       source = HeapOperand(obj, offset);
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
-      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      if (instruction->GetArray()->IsIntermediateAddress()) {
         // The read barrier instrumentation does not support the
-        // HArm64IntermediateAddress instruction yet.
+        // HIntermediateAddress instruction yet.
         DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
-        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        // `TryExtractArrayAccessAddress()`.
         if (kIsDebugBuild) {
-          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
           DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
         }
         temp = obj;
@@ -2118,9 +2152,9 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   BlockPoolsScope block_pools(GetVIXLAssembler());
-  __ Ldr(OutputRegister(instruction),
-         HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset()));
+  __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset));
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
@@ -2168,15 +2202,15 @@
     } else {
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
-      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      if (instruction->GetArray()->IsIntermediateAddress()) {
         // The read barrier instrumentation does not support the
-        // HArm64IntermediateAddress instruction yet.
+        // HIntermediateAddress instruction yet.
         DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
-        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        // `TryExtractArrayAccessAddress()`.
         if (kIsDebugBuild) {
-          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
           DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
         }
         temp = array;
@@ -2192,8 +2226,8 @@
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
     DCHECK(needs_write_barrier);
-    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
-    vixl::Label done;
+    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
+    vixl::aarch64::Label done;
     SlowPathCodeARM64* slow_path = nullptr;
     {
       // We use a block to end the scratch scope before the write barrier, thus
@@ -2218,7 +2252,7 @@
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
-          vixl::Label non_zero;
+          vixl::aarch64::Label non_zero;
           __ Cbnz(Register(value), &non_zero);
           if (!index.IsConstant()) {
             __ Add(temp, array, offset);
@@ -2272,7 +2306,7 @@
           __ Cmp(temp, temp2);
 
           if (instruction->StaticTypeOfArrayIsObjectArray()) {
-            vixl::Label do_put;
+            vixl::aarch64::Label do_put;
             __ B(eq, &do_put);
             // If heap poisoning is enabled, the `temp` reference has
             // not been unpoisoned yet; unpoison it now.
@@ -2805,11 +2839,11 @@
 
 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
                                                           size_t condition_input_index,
-                                                          vixl::Label* true_target,
-                                                          vixl::Label* false_target) {
+                                                          vixl::aarch64::Label* true_target,
+                                                          vixl::aarch64::Label* false_target) {
   // FP branching requires both targets to be explicit. If either of the targets
   // is nullptr (fallthrough) use and bind `fallthrough_target` instead.
-  vixl::Label fallthrough_target;
+  vixl::aarch64::Label fallthrough_target;
   HInstruction* cond = instruction->InputAt(condition_input_index);
 
   if (true_target == nullptr && false_target == nullptr) {
@@ -2867,7 +2901,7 @@
       Operand rhs = InputOperandAt(condition, 1);
 
       Condition arm64_cond;
-      vixl::Label* non_fallthrough_target;
+      vixl::aarch64::Label* non_fallthrough_target;
       if (true_target == nullptr) {
         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
         non_fallthrough_target = false_target;
@@ -2877,7 +2911,7 @@
       }
 
       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
-          rhs.IsImmediate() && (rhs.immediate() == 0)) {
+          rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
         switch (arm64_cond) {
           case eq:
             __ Cbz(lhs, non_fallthrough_target);
@@ -2926,10 +2960,14 @@
 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
-  vixl::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
-      nullptr : codegen_->GetLabelOf(true_successor);
-  vixl::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
-      nullptr : codegen_->GetLabelOf(false_successor);
+  vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
+    true_target = nullptr;
+  }
+  vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
+    false_target = nullptr;
+  }
   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
@@ -2950,75 +2988,48 @@
                         /* false_target */ nullptr);
 }
 
-enum SelectVariant {
-  kCsel,
-  kCselFalseConst,
-  kCselTrueConst,
-  kFcsel,
-};
-
 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
   return condition->IsCondition() &&
          Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
 }
 
-static inline bool IsRecognizedCselConstant(HInstruction* constant) {
-  if (constant->IsConstant()) {
-    int64_t value = Int64FromConstant(constant->AsConstant());
-    if ((value == -1) || (value == 0) || (value == 1)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-static inline SelectVariant GetSelectVariant(HSelect* select) {
-  if (Primitive::IsFloatingPointType(select->GetType())) {
-    return kFcsel;
-  } else if (IsRecognizedCselConstant(select->GetFalseValue())) {
-    return kCselFalseConst;
-  } else if (IsRecognizedCselConstant(select->GetTrueValue())) {
-    return kCselTrueConst;
-  } else {
-    return kCsel;
-  }
-}
-
-static inline bool HasSwappedInputs(SelectVariant variant) {
-  return variant == kCselTrueConst;
-}
-
-static inline Condition GetConditionForSelect(HCondition* condition, SelectVariant variant) {
-  IfCondition cond = HasSwappedInputs(variant) ? condition->GetOppositeCondition()
-                                               : condition->GetCondition();
+static inline Condition GetConditionForSelect(HCondition* condition) {
+  IfCondition cond = condition->AsCondition()->GetCondition();
   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
                                                      : ARM64Condition(cond);
 }
 
 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  switch (GetSelectVariant(select)) {
-    case kCsel:
-      locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->SetOut(Location::RequiresRegister());
-      break;
-    case kCselFalseConst:
-      locations->SetInAt(0, Location::ConstantLocation(select->InputAt(0)->AsConstant()));
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->SetOut(Location::RequiresRegister());
-      break;
-    case kCselTrueConst:
-      locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::ConstantLocation(select->InputAt(1)->AsConstant()));
-      locations->SetOut(Location::RequiresRegister());
-      break;
-    case kFcsel:
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresFpuRegister());
-      break;
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresFpuRegister());
+  } else {
+    HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
+    HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
+    bool is_true_value_constant = cst_true_value != nullptr;
+    bool is_false_value_constant = cst_false_value != nullptr;
+    // Ask VIXL whether we should synthesize constants in registers.
+    // We give an arbitrary register to VIXL when dealing with non-constant inputs.
+    Operand true_op = is_true_value_constant ?
+        Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
+    Operand false_op = is_false_value_constant ?
+        Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
+    bool true_value_in_register = false;
+    bool false_value_in_register = false;
+    MacroAssembler::GetCselSynthesisInformation(
+        x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
+    true_value_in_register |= !is_true_value_constant;
+    false_value_in_register |= !is_false_value_constant;
+
+    locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
+                                                 : Location::ConstantLocation(cst_true_value));
+    locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
+                                                  : Location::ConstantLocation(cst_false_value));
+    locations->SetOut(Location::RequiresRegister());
   }
+
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
     locations->SetInAt(2, Location::RequiresRegister());
   }
@@ -3026,45 +3037,34 @@
 
 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
   HInstruction* cond = select->GetCondition();
-  SelectVariant variant = GetSelectVariant(select);
   Condition csel_cond;
 
   if (IsBooleanValueOrMaterializedCondition(cond)) {
     if (cond->IsCondition() && cond->GetNext() == select) {
-      // Condition codes set from previous instruction.
-      csel_cond = GetConditionForSelect(cond->AsCondition(), variant);
+      // Use the condition flags set by the previous instruction.
+      csel_cond = GetConditionForSelect(cond->AsCondition());
     } else {
       __ Cmp(InputRegisterAt(select, 2), 0);
-      csel_cond = HasSwappedInputs(variant) ? eq : ne;
+      csel_cond = ne;
     }
   } else if (IsConditionOnFloatingPointValues(cond)) {
     GenerateFcmp(cond);
-    csel_cond = GetConditionForSelect(cond->AsCondition(), variant);
+    csel_cond = GetConditionForSelect(cond->AsCondition());
   } else {
     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
-    csel_cond = GetConditionForSelect(cond->AsCondition(), variant);
+    csel_cond = GetConditionForSelect(cond->AsCondition());
   }
 
-  switch (variant) {
-    case kCsel:
-    case kCselFalseConst:
-      __ Csel(OutputRegister(select),
-              InputRegisterAt(select, 1),
-              InputOperandAt(select, 0),
-              csel_cond);
-      break;
-    case kCselTrueConst:
-      __ Csel(OutputRegister(select),
-              InputRegisterAt(select, 0),
-              InputOperandAt(select, 1),
-              csel_cond);
-      break;
-    case kFcsel:
-      __ Fcsel(OutputFPRegister(select),
-               InputFPRegisterAt(select, 1),
-               InputFPRegisterAt(select, 0),
-               csel_cond);
-      break;
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    __ Fcsel(OutputFPRegister(select),
+             InputFPRegisterAt(select, 1),
+             InputFPRegisterAt(select, 0),
+             csel_cond);
+  } else {
+    __ Csel(OutputRegister(select),
+            InputOperandAt(select, 1),
+            InputOperandAt(select, 0),
+            csel_cond);
   }
 }
 
@@ -3151,7 +3151,7 @@
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  vixl::Label done, zero;
+  vixl::aarch64::Label done, zero;
   SlowPathCodeARM64* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
@@ -3176,7 +3176,7 @@
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop, success;
+      vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
@@ -3193,7 +3193,7 @@
 
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      vixl::Label loop, success;
+      vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
@@ -3212,7 +3212,7 @@
 
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
-      vixl::Label exact_check;
+      vixl::aarch64::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
@@ -3349,7 +3349,7 @@
                                                           is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
-  vixl::Label done;
+  vixl::aarch64::Label done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ Cbz(obj, &done);
@@ -3371,7 +3371,7 @@
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop, compare_classes;
+      vixl::aarch64::Label loop, compare_classes;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
@@ -3398,7 +3398,7 @@
 
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      vixl::Label loop;
+      vixl::aarch64::Label loop;
       __ Bind(&loop);
       __ Cmp(temp, cls);
       __ B(eq, &done);
@@ -3423,7 +3423,7 @@
 
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
-      vixl::Label check_non_primitive_component_type;
+      vixl::aarch64::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
@@ -3529,7 +3529,7 @@
   Register temp = XRegisterFrom(locations->GetTemp(0));
   Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
 
   // The register ip1 is required to be used for the hidden argument in
   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
@@ -3559,7 +3559,7 @@
   __ Ldr(temp,
       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kArm64PointerSize));
+      invoke->GetImtIndex(), kArm64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -3649,17 +3649,17 @@
       // Add ADRP with its PC-relative DexCache access patch.
       const DexFile& dex_file = *invoke->GetTargetMethod().dex_file;
       uint32_t element_offset = invoke->GetDexCacheArrayOffset();
-      vixl::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(adrp_label);
         __ adrp(XRegisterFrom(temp), /* offset placeholder */ 0);
       }
       // Add LDR with its PC-relative DexCache access patch.
-      vixl::Label* ldr_label =
+      vixl::aarch64::Label* ldr_label =
           NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(ldr_label);
         __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0));
       }
@@ -3681,7 +3681,7 @@
       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ Ldr(reg.X(),
              MemOperand(method_reg.X(),
-                        ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
+                        ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value()));
       // temp = temp[index_in_cache];
       // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
       uint32_t index_in_cache = invoke->GetDexMethodIndex();
@@ -3696,8 +3696,8 @@
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
       relative_call_patches_.emplace_back(invoke->GetTargetMethod());
-      vixl::Label* label = &relative_call_patches_.back().label;
-      vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+      vixl::aarch64::Label* label = &relative_call_patches_.back().label;
+      SingleEmissionCheckScope guard(GetVIXLAssembler());
       __ Bind(label);
       __ bl(0);  // Branch and link to itself. This will be overriden at link time.
       break;
@@ -3713,7 +3713,7 @@
       // LR = callee_method->entry_point_from_quick_compiled_code_;
       __ Ldr(lr, MemOperand(
           XRegisterFrom(callee_method),
-          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value()));
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
       // lr()
       __ Blr(lr);
       break;
@@ -3733,7 +3733,7 @@
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
   Offset class_offset = mirror::Object::ClassOffset();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
 
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
@@ -3756,45 +3756,64 @@
   __ Blr(lr);
 }
 
-vixl::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                          uint32_t string_index,
-                                                          vixl::Label* adrp_label) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
+    const DexFile& dex_file,
+    uint32_t string_index,
+    vixl::aarch64::Label* adrp_label) {
   return NewPcRelativePatch(dex_file, string_index, adrp_label, &pc_relative_string_patches_);
 }
 
-vixl::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                                                 uint32_t element_offset,
-                                                                 vixl::Label* adrp_label) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
+    const DexFile& dex_file,
+    uint32_t type_index,
+    vixl::aarch64::Label* adrp_label) {
+  return NewPcRelativePatch(dex_file, type_index, adrp_label, &pc_relative_type_patches_);
+}
+
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file,
+    uint32_t element_offset,
+    vixl::aarch64::Label* adrp_label) {
   return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
 }
 
-vixl::Label* CodeGeneratorARM64::NewPcRelativePatch(const DexFile& dex_file,
-                                                    uint32_t offset_or_index,
-                                                    vixl::Label* adrp_label,
-                                                    ArenaDeque<PcRelativePatchInfo>* patches) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
+    const DexFile& dex_file,
+    uint32_t offset_or_index,
+    vixl::aarch64::Label* adrp_label,
+    ArenaDeque<PcRelativePatchInfo>* patches) {
   // Add a patch entry and return the label.
   patches->emplace_back(dex_file, offset_or_index);
   PcRelativePatchInfo* info = &patches->back();
-  vixl::Label* label = &info->label;
+  vixl::aarch64::Label* label = &info->label;
   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
   return label;
 }
 
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
     const DexFile& dex_file, uint32_t string_index) {
   return boot_image_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
 }
 
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(uint64_t address) {
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
+    const DexFile& dex_file, uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+}
+
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
+    uint64_t address) {
   bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
   Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(uint64_t address) {
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(
+    uint64_t address) {
   return DeduplicateUint64Literal(address);
 }
 
@@ -3807,67 +3826,82 @@
       pc_relative_dex_cache_patches_.size() +
       boot_image_string_patches_.size() +
       pc_relative_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      pc_relative_type_patches_.size() +
       boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
-    vixl::Literal<uint64_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::MethodPatch(literal->offset(),
+    vixl::aarch64::Literal<uint64_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal->GetOffset(),
                                                        target_method.dex_file,
                                                        target_method.dex_method_index));
   }
   for (const auto& entry : call_patches_) {
     const MethodReference& target_method = entry.first;
-    vixl::Literal<uint64_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::CodePatch(literal->offset(),
+    vixl::aarch64::Literal<uint64_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::CodePatch(literal->GetOffset(),
                                                      target_method.dex_file,
                                                      target_method.dex_method_index));
   }
-  for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) {
-    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location(),
+  for (const MethodPatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.GetLocation(),
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
   for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
-    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(),
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
                                                               &info.target_dex_file,
-                                                              info.pc_insn_label->location(),
+                                                              info.pc_insn_label->GetLocation(),
                                                               info.offset_or_index));
   }
   for (const auto& entry : boot_image_string_patches_) {
     const StringReference& target_string = entry.first;
-    vixl::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::StringPatch(literal->offset(),
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
                                                        target_string.dex_file,
                                                        target_string.string_index));
   }
   for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
-    linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.location(),
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.GetLocation(),
                                                                &info.target_dex_file,
-                                                               info.pc_insn_label->location(),
+                                                               info.pc_insn_label->GetLocation(),
                                                                info.offset_or_index));
   }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.GetLocation(),
+                                                             &info.target_dex_file,
+                                                             info.pc_insn_label->GetLocation(),
+                                                             info.offset_or_index));
+  }
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    vixl::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal->offset()));
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal->GetOffset()));
   }
 }
 
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
                                                                       Uint32ToLiteralMap* map) {
   return map->GetOrCreate(
       value,
       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
   return uint64_literals_.GetOrCreate(
       value,
       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
     MethodReference target_method,
     MethodToLiteralMap* map) {
   return map->GetOrCreate(
@@ -3875,12 +3909,12 @@
       [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
     MethodReference target_method) {
   return DeduplicateMethodLiteral(target_method, &method_patches_);
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
     MethodReference target_method) {
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
@@ -3912,13 +3946,63 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
+HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      LocationFrom(calling_convention.GetRegisterAt(0)),
-      LocationFrom(vixl::x0),
-      /* code_generator_supports_read_barrier */ true);
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        LocationFrom(vixl::aarch64::x0),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
@@ -3934,41 +4018,119 @@
 
   Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
-  Register current_method = InputRegisterAt(cls, 0);
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    GenerateGcRootFieldLoad(
-        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
-    MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
-    // /* GcRoot<mirror::Class>[] */ out =
-    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-    __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-    // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(
-        cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
-          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ Cbz(out, slow_path->GetEntryLabel());
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = InputRegisterAt(cls, 0);
+      GenerateGcRootFieldLoad(
+          cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                            cls->GetTypeIndex()));
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      // Add ADRP with its PC-relative type patch.
+      const DexFile& dex_file = cls->GetDexFile();
+      uint32_t type_index = cls->GetTypeIndex();
+      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
       }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
-      } else {
-        __ Bind(slow_path->GetExitLabel());
+      // Add ADD with its PC-relative type patch.
+      vixl::aarch64::Label* add_label =
+          codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(add_label);
+        __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
       }
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(cls->GetAddress() != 0u && IsUint<32>(cls->GetAddress()));
+      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress()));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
+      // that gives a 16KiB range. To try and reduce the number of literals if we load
+      // multiple types, simply split the dex cache address to a 16KiB aligned base
+      // loaded from a literal and the remaining offset embedded in the load.
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
+      uint64_t base_address = cls->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
+      uint32_t offset = cls->GetAddress() & MaxInt<uint64_t>(offset_bits);
+      __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out.X(), offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      // Add ADRP with its PC-relative DexCache access patch.
+      const DexFile& dex_file = cls->GetDexFile();
+      uint32_t element_offset = cls->GetDexCacheElementOffset();
+      vixl::aarch64::Label* adrp_label =
+          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      {
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
+      }
+      // Add LDR with its PC-relative DexCache access patch.
+      vixl::aarch64::Label* ldr_label =
+          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
+      GenerateGcRootFieldLoad(cls, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      MemberOffset resolved_types_offset =
+          ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = InputRegisterAt(cls, 0);
+      __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ Cbz(out, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
 
 static MemOperand GetExceptionTlsAddress() {
-  return MemOperand(tr, Thread::ExceptionOffset<kArm64WordSize>().Int32Value());
+  return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
 }
 
 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
@@ -4035,7 +4197,6 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
-  Location out_loc = load->GetLocations()->Out();
   Register out = OutputRegister(load);
 
   switch (load->GetLoadKind()) {
@@ -4049,17 +4210,17 @@
       // Add ADRP with its PC-relative String patch.
       const DexFile& dex_file = load->GetDexFile();
       uint32_t string_index = load->GetStringIndex();
-      vixl::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
+      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(adrp_label);
         __ adrp(out.X(), /* offset placeholder */ 0);
       }
       // Add ADD with its PC-relative String patch.
-      vixl::Label* add_label =
+      vixl::aarch64::Label* add_label =
           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(add_label);
         __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
       }
@@ -4071,60 +4232,15 @@
       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress()));
       return;  // No dex cache slow path.
     }
-    case HLoadString::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(load->GetAddress(), 0u);
-      // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
-      // that gives a 16KiB range. To try and reduce the number of literals if we load
-      // multiple strings, simply split the dex cache address to a 16KiB aligned base
-      // loaded from a literal and the remaining offset embedded in the load.
-      static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
-      DCHECK_ALIGNED(load->GetAddress(), 4u);
-      constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
-      uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
-      uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits);
-      __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
-      GenerateGcRootFieldLoad(load, out_loc, out.X(), offset);
-      break;
-    }
-    case HLoadString::LoadKind::kDexCachePcRelative: {
-      // Add ADRP with its PC-relative DexCache access patch.
-      const DexFile& dex_file = load->GetDexFile();
-      uint32_t element_offset = load->GetDexCacheElementOffset();
-      vixl::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
-      {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
-        __ Bind(adrp_label);
-        __ adrp(out.X(), /* offset placeholder */ 0);
-      }
-      // Add LDR with its PC-relative DexCache access patch.
-      vixl::Label* ldr_label =
-          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
-      GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
-      break;
-    }
-    case HLoadString::LoadKind::kDexCacheViaMethod: {
-      Register current_method = InputRegisterAt(load, 0);
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      GenerateGcRootFieldLoad(
-          load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
-      __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-      // /* GcRoot<mirror::String> */ out = out[string_index]
-      GenerateGcRootFieldLoad(
-          load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-      break;
-    }
     default:
-      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
-      UNREACHABLE();
+      break;
   }
 
-  if (!load->IsInDexCache()) {
-    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
-    codegen_->AddSlowPath(slow_path);
-    __ Cbz(out, slow_path->GetEntryLabel());
-    __ Bind(slow_path->GetExitLabel());
-  }
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+  codegen_->AddSlowPath(slow_path);
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
@@ -4138,7 +4254,7 @@
 
 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
 }
@@ -4236,7 +4352,7 @@
 
 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
   locations->SetOut(LocationFrom(x0));
@@ -4261,7 +4377,7 @@
 
 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(LocationFrom(kArtMethodRegister));
@@ -4278,7 +4394,7 @@
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     Location temp = instruction->GetLocations()->GetTemp(0);
-    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
     __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
     __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
     __ Blr(lr);
@@ -4317,7 +4433,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
-  __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::Operand(1));
+  __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
 }
 
 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
@@ -4401,7 +4517,7 @@
 
 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -4414,7 +4530,8 @@
 void LocationsBuilderARM64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                           : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -4631,7 +4748,7 @@
 
 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
 }
@@ -4749,7 +4866,7 @@
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
   // Roughly set 16 as max average assemblies generated per HIR in a graph.
-  static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize;
+  static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
   // make sure we don't emit it if the target may run out of range.
   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
@@ -4894,9 +5011,9 @@
 
 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
                                                             Location root,
-                                                            vixl::Register obj,
+                                                            Register obj,
                                                             uint32_t offset,
-                                                            vixl::Label* fixup_label) {
+                                                            vixl::aarch64::Label* fixup_label) {
   Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
@@ -4912,7 +5029,7 @@
       if (fixup_label == nullptr) {
         __ Ldr(root_reg, MemOperand(obj, offset));
       } else {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(fixup_label);
         __ ldr(root_reg, MemOperand(obj, offset));
       }
@@ -4924,16 +5041,16 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      // Slow path used to mark the GC root `root`.
+      // Slow path marking the GC root `root`.
       SlowPathCodeARM64* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root);
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
       codegen_->AddSlowPath(slow_path);
 
       MacroAssembler* masm = GetVIXLAssembler();
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireW();
       // temp = Thread::Current()->GetIsGcMarking()
-      __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value()));
+      __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
       __ Cbnz(temp, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
     } else {
@@ -4943,7 +5060,7 @@
       if (fixup_label == nullptr) {
         __ Add(root_reg.X(), obj.X(), offset);
       } else {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(fixup_label);
         __ add(root_reg.X(), obj.X(), offset);
       }
@@ -4956,7 +5073,7 @@
     if (fixup_label == nullptr) {
       __ Ldr(root_reg, MemOperand(obj, offset));
     } else {
-      vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+      SingleEmissionCheckScope guard(GetVIXLAssembler());
       __ Bind(fixup_label);
       __ ldr(root_reg, MemOperand(obj, offset));
     }
@@ -4967,7 +5084,7 @@
 
 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                Location ref,
-                                                               vixl::Register obj,
+                                                               Register obj,
                                                                uint32_t offset,
                                                                Register temp,
                                                                bool needs_null_check,
@@ -4977,13 +5094,21 @@
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
-  GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire);
+  size_t no_scale_factor = 0U;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            offset,
+                                            no_index,
+                                            no_scale_factor,
+                                            temp,
+                                            needs_null_check,
+                                            use_load_acquire);
 }
 
 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                Location ref,
-                                                               vixl::Register obj,
+                                                               Register obj,
                                                                uint32_t data_offset,
                                                                Location index,
                                                                Register temp,
@@ -4995,26 +5120,38 @@
   // never use Load-Acquire instructions on ARM64.
   const bool use_load_acquire = false;
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire);
+  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            data_offset,
+                                            index,
+                                            scale_factor,
+                                            temp,
+                                            needs_null_check,
+                                            use_load_acquire);
 }
 
 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                    Location ref,
-                                                                   vixl::Register obj,
+                                                                   Register obj,
                                                                    uint32_t offset,
                                                                    Location index,
+                                                                   size_t scale_factor,
                                                                    Register temp,
                                                                    bool needs_null_check,
                                                                    bool use_load_acquire) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
-  // If `index` is a valid location, then we are emitting an array
-  // load, so we shouldn't be using a Load Acquire instruction.
-  // In other words: `index.IsValid()` => `!use_load_acquire`.
-  DCHECK(!index.IsValid() || !use_load_acquire);
+  // If we are emitting an array load, we should not be using a
+  // Load Acquire instruction.  In other words:
+  // `instruction->IsArrayGet()` => `!use_load_acquire`.
+  DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
@@ -5051,40 +5188,43 @@
   // /* LockWord */ lock_word = LockWord(monitor)
   static_assert(sizeof(LockWord) == sizeof(int32_t),
                 "art::LockWord and int32_t have different sizes.");
-  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
-  __ Lsr(temp, temp, LockWord::kReadBarrierStateShift);
-  __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask));
-  static_assert(
-      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
-      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
 
-  // Introduce a dependency on the high bits of rb_state, which shall
-  // be all zeroes, to prevent load-load reordering, and without using
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
   // a memory barrier (which would be more expensive).
-  // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0
-  Register temp2 = temps.AcquireW();
-  __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask));
-  // obj is unchanged by this operation, but its value now depends on
-  // temp2, which depends on temp.
-  __ Add(obj, obj, Operand(temp2));
-  temps.Release(temp2);
+  // `obj` is unchanged by this operation, but its value now depends
+  // on `temp`.
+  __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
 
   // The actual reference load.
   if (index.IsValid()) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    // /* HeapReference<Object> */ ref =
-    //     *(obj + offset + index * sizeof(HeapReference<Object>))
-    const size_t shift_amount = Primitive::ComponentSizeShift(type);
-    if (index.IsConstant()) {
-      uint32_t computed_offset = offset + (Int64ConstantFrom(index) << shift_amount);
-      Load(type, ref_reg, HeapOperand(obj, computed_offset));
+    // Load types involving an "index".
+    if (use_load_acquire) {
+      // UnsafeGetObjectVolatile intrinsic case.
+      // Register `index` is not an index in an object array, but an
+      // offset to an object reference field within object `obj`.
+      DCHECK(instruction->IsInvoke()) << instruction->DebugName();
+      DCHECK(instruction->GetLocations()->Intrinsified());
+      DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
+          << instruction->AsInvoke()->GetIntrinsic();
+      DCHECK_EQ(offset, 0U);
+      DCHECK_EQ(scale_factor, 0U);
+      DCHECK_EQ(needs_null_check, 0U);
+      // /* HeapReference<Object> */ ref = *(obj + index)
+      MemOperand field = HeapOperand(obj, XRegisterFrom(index));
+      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
     } else {
-      temp2 = temps.AcquireW();
-      __ Add(temp2, obj, offset);
-      Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, shift_amount));
-      temps.Release(temp2);
+      // ArrayGet and UnsafeGetObject intrinsics cases.
+      // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+      if (index.IsConstant()) {
+        uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
+        Load(type, ref_reg, HeapOperand(obj, computed_offset));
+      } else {
+        Register temp2 = temps.AcquireW();
+        __ Add(temp2, obj, offset);
+        Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
+        temps.Release(temp2);
+      }
     }
   } else {
     // /* HeapReference<Object> */ ref = *(obj + offset)
@@ -5099,15 +5239,18 @@
   // Object* ref = ref_addr->AsMirrorPtr()
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
 
-  // Slow path used to mark the object `ref` when it is gray.
+  // Slow path marking the object `ref` when it is gray.
   SlowPathCodeARM64* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref);
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
   AddSlowPath(slow_path);
 
   // if (rb_state == ReadBarrier::gray_ptr_)
   //   ref = ReadBarrier::Mark(ref);
-  __ Cmp(temp, ReadBarrier::gray_ptr_);
-  __ B(eq, slow_path->GetEntryLabel());
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -5189,7 +5332,7 @@
            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
   } else {
     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kArm64PointerSize));
+        instruction->GetIndex(), kArm64PointerSize));
     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
     __ Ldr(XRegisterFrom(locations->Out()),
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 422963e..921ce10 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -20,14 +20,19 @@
 #include "arch/arm64/quick_method_frame_info_arm64.h"
 #include "code_generator.h"
 #include "common_arm64.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "string_reference.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "utils/string_reference.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+#include "utils/type_reference.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
@@ -35,34 +40,49 @@
 class CodeGeneratorARM64;
 
 // Use a local definition to prevent copying mistakes.
-static constexpr size_t kArm64WordSize = kArm64PointerSize;
+static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
 
-static const vixl::Register kParameterCoreRegisters[] = {
-  vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7
+static const vixl::aarch64::Register kParameterCoreRegisters[] = {
+  vixl::aarch64::x1,
+  vixl::aarch64::x2,
+  vixl::aarch64::x3,
+  vixl::aarch64::x4,
+  vixl::aarch64::x5,
+  vixl::aarch64::x6,
+  vixl::aarch64::x7
 };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-static const vixl::FPRegister kParameterFPRegisters[] = {
-  vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7
+static const vixl::aarch64::FPRegister kParameterFPRegisters[] = {
+  vixl::aarch64::d0,
+  vixl::aarch64::d1,
+  vixl::aarch64::d2,
+  vixl::aarch64::d3,
+  vixl::aarch64::d4,
+  vixl::aarch64::d5,
+  vixl::aarch64::d6,
+  vixl::aarch64::d7
 };
 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
 
-const vixl::Register tr = vixl::x19;                        // Thread Register
-static const vixl::Register kArtMethodRegister = vixl::x0;  // Method register on invoke.
+// Thread Register
+const vixl::aarch64::Register tr = vixl::aarch64::x19;
+// Method register on invoke.
+static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
+const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
+                                                             vixl::aarch64::ip1);
+const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
 
-const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
-const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-
-const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
+const vixl::aarch64::CPURegList runtime_reserved_core_registers(tr, vixl::aarch64::lr);
 
 // Callee-saved registers AAPCS64 (without x19 - Thread Register)
-const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
-                                                   vixl::kXRegSize,
-                                                   vixl::x20.code(),
-                                                   vixl::x30.code());
-const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
-                                                 vixl::kDRegSize,
-                                                 vixl::d8.code(),
-                                                 vixl::d15.code());
+const vixl::aarch64::CPURegList callee_saved_core_registers(vixl::aarch64::CPURegister::kRegister,
+                                                            vixl::aarch64::kXRegSize,
+                                                            vixl::aarch64::x20.GetCode(),
+                                                            vixl::aarch64::x30.GetCode());
+const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister,
+                                                          vixl::aarch64::kDRegSize,
+                                                          vixl::aarch64::d8.GetCode(),
+                                                          vixl::aarch64::d15.GetCode());
 Location ARM64ReturnLocation(Primitive::Type return_type);
 
 class SlowPathCodeARM64 : public SlowPathCode {
@@ -70,15 +90,15 @@
   explicit SlowPathCodeARM64(HInstruction* instruction)
       : SlowPathCode(instruction), entry_label_(), exit_label_() {}
 
-  vixl::Label* GetEntryLabel() { return &entry_label_; }
-  vixl::Label* GetExitLabel() { return &exit_label_; }
+  vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; }
+  vixl::aarch64::Label* GetExitLabel() { return &exit_label_; }
 
   void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
   void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
 
  private:
-  vixl::Label entry_label_;
-  vixl::Label exit_label_;
+  vixl::aarch64::Label entry_label_;
+  vixl::aarch64::Label exit_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
 };
@@ -88,27 +108,42 @@
   explicit JumpTableARM64(HPackedSwitch* switch_instr)
     : switch_instr_(switch_instr), table_start_() {}
 
-  vixl::Label* GetTableStartLabel() { return &table_start_; }
+  vixl::aarch64::Label* GetTableStartLabel() { return &table_start_; }
 
   void EmitTable(CodeGeneratorARM64* codegen);
 
  private:
   HPackedSwitch* const switch_instr_;
-  vixl::Label table_start_;
+  vixl::aarch64::Label table_start_;
 
   DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
 };
 
-static const vixl::Register kRuntimeParameterCoreRegisters[] =
-    { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 };
+static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
+    { vixl::aarch64::x0,
+      vixl::aarch64::x1,
+      vixl::aarch64::x2,
+      vixl::aarch64::x3,
+      vixl::aarch64::x4,
+      vixl::aarch64::x5,
+      vixl::aarch64::x6,
+      vixl::aarch64::x7 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static const vixl::FPRegister kRuntimeParameterFpuRegisters[] =
-    { vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7 };
+static const vixl::aarch64::FPRegister kRuntimeParameterFpuRegisters[] =
+    { vixl::aarch64::d0,
+      vixl::aarch64::d1,
+      vixl::aarch64::d2,
+      vixl::aarch64::d3,
+      vixl::aarch64::d4,
+      vixl::aarch64::d5,
+      vixl::aarch64::d6,
+      vixl::aarch64::d7 };
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 
-class InvokeRuntimeCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
+                                                                vixl::aarch64::FPRegister> {
  public:
   static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 
@@ -125,7 +160,8 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
 };
 
-class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Register,
+                                                            vixl::aarch64::FPRegister> {
  public:
   InvokeDexCallingConvention()
       : CallingConvention(kParameterCoreRegisters,
@@ -165,23 +201,23 @@
   FieldAccessCallingConventionARM64() {}
 
   Location GetObjectLocation() const OVERRIDE {
-    return helpers::LocationFrom(vixl::x1);
+    return helpers::LocationFrom(vixl::aarch64::x1);
   }
   Location GetFieldIndexLocation() const OVERRIDE {
-    return helpers::LocationFrom(vixl::x0);
+    return helpers::LocationFrom(vixl::aarch64::x0);
   }
   Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
-    return helpers::LocationFrom(vixl::x0);
+    return helpers::LocationFrom(vixl::aarch64::x0);
   }
   Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
     return Primitive::Is64BitType(type)
-        ? helpers::LocationFrom(vixl::x2)
+        ? helpers::LocationFrom(vixl::aarch64::x2)
         : (is_instance
-            ? helpers::LocationFrom(vixl::x2)
-            : helpers::LocationFrom(vixl::x1));
+            ? helpers::LocationFrom(vixl::aarch64::x2)
+            : helpers::LocationFrom(vixl::aarch64::x1));
   }
   Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
-    return helpers::LocationFrom(vixl::d0);
+    return helpers::LocationFrom(vixl::aarch64::d0);
   }
 
  private:
@@ -207,10 +243,11 @@
   }
 
   Arm64Assembler* GetAssembler() const { return assembler_; }
-  vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
  private:
-  void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
+  void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
+                                        vixl::aarch64::Register class_reg);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
 
@@ -255,9 +292,9 @@
   // while honoring read barriers (if any).
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
-                               vixl::Register obj,
+                               vixl::aarch64::Register obj,
                                uint32_t offset,
-                               vixl::Label* fixup_label = nullptr);
+                               vixl::aarch64::Label* fixup_label = nullptr);
 
   // Generate a floating-point comparison.
   void GenerateFcmp(HInstruction* instruction);
@@ -265,8 +302,8 @@
   void HandleShift(HBinaryOperation* instr);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
-                             vixl::Label* true_target,
-                             vixl::Label* false_target);
+                             vixl::aarch64::Label* true_target,
+                             vixl::aarch64::Label* false_target);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -326,12 +363,12 @@
 
  private:
   Arm64Assembler* GetAssembler() const;
-  vixl::MacroAssembler* GetVIXLAssembler() const {
-    return GetAssembler()->vixl_masm_;
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
+    return GetAssembler()->GetVIXLAssembler();
   }
 
   CodeGeneratorARM64* const codegen_;
-  vixl::UseScratchRegisterScope vixl_temps_;
+  vixl::aarch64::UseScratchRegisterScope vixl_temps_;
 
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
 };
@@ -347,12 +384,12 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
 
-  vixl::CPURegList GetFramePreservedCoreRegisters() const;
-  vixl::CPURegList GetFramePreservedFPRegisters() const;
+  vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const;
+  vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const;
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
-  vixl::Label* GetLabelOf(HBasicBlock* block) {
+  vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) {
     block = FirstNonEmptyBlock(block);
     return &(block_labels_[block->GetBlockId()]);
   }
@@ -367,19 +404,21 @@
   }
 
   uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
-    vixl::Label* block_entry_label = GetLabelOf(block);
+    vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
     DCHECK(block_entry_label->IsBound());
-    return block_entry_label->location();
+    return block_entry_label->GetLocation();
   }
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
   HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
   Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
   const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
-  vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
   // Emit a write barrier.
-  void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null);
+  void MarkGCCard(vixl::aarch64::Register object,
+                  vixl::aarch64::Register value,
+                  bool value_can_be_null);
 
   void GenerateMemoryBarrier(MemBarrierKind kind);
 
@@ -398,8 +437,8 @@
   // (xzr, wzr), or make for poor allocatable registers (sp alignment
   // requirements, etc.). This also facilitates our task as all other registers
   // can easily be mapped via to or from their type and index or code.
-  static const int kNumberOfAllocatableRegisters = vixl::kNumberOfRegisters - 1;
-  static const int kNumberOfAllocatableFPRegisters = vixl::kNumberOfFPRegisters;
+  static const int kNumberOfAllocatableRegisters = vixl::aarch64::kNumberOfRegisters - 1;
+  static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters;
   static constexpr int kNumberOfAllocatableRegisterPairs = 0;
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -417,6 +456,10 @@
     block_labels_.resize(GetGraph()->GetBlocks().size());
   }
 
+  // We want to use the STP and LDP instructions to spill and restore registers for slow paths.
+  // These instructions can only encode offsets that are multiples of the register size accessed.
+  uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::aarch64::kXRegSizeInBytes; }
+
   JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
     jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARM64(switch_instr));
     return jump_tables_.back().get();
@@ -425,18 +468,24 @@
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
   // Code generation helpers.
-  void MoveConstant(vixl::CPURegister destination, HConstant* constant);
+  void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
 
-  void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+  void Load(Primitive::Type type,
+            vixl::aarch64::CPURegister dst,
+            const vixl::aarch64::MemOperand& src);
+  void Store(Primitive::Type type,
+             vixl::aarch64::CPURegister src,
+             const vixl::aarch64::MemOperand& dst);
   void LoadAcquire(HInstruction* instruction,
-                   vixl::CPURegister dst,
-                   const vixl::MemOperand& src,
+                   vixl::aarch64::CPURegister dst,
+                   const vixl::aarch64::MemOperand& src,
                    bool needs_null_check);
-  void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+  void StoreRelease(Primitive::Type type,
+                    vixl::aarch64::CPURegister src,
+                    const vixl::aarch64::MemOperand& dst);
 
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -449,6 +498,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -460,6 +515,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -478,22 +538,33 @@
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
-  vixl::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                        uint32_t string_index,
-                                        vixl::Label* adrp_label = nullptr);
+  vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                 uint32_t string_index,
+                                                 vixl::aarch64::Label* adrp_label = nullptr);
+
+  // Add a new PC-relative type patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::aarch64::Label* NewPcRelativeTypePatch(const DexFile& dex_file,
+                                               uint32_t type_index,
+                                               vixl::aarch64::Label* adrp_label = nullptr);
 
   // Add a new PC-relative dex cache array patch for an instruction and return
   // the label to be bound before the instruction. The instruction will be
   // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
   // pointing to the associated ADRP patch label).
-  vixl::Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                               uint32_t element_offset,
-                                               vixl::Label* adrp_label = nullptr);
+  vixl::aarch64::Label* NewPcRelativeDexCacheArrayPatch(
+      const DexFile& dex_file,
+      uint32_t element_offset,
+      vixl::aarch64::Label* adrp_label = nullptr);
 
-  vixl::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                             uint32_t string_index);
-  vixl::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
-  vixl::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                                      uint32_t string_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                                    uint32_t type_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
@@ -501,20 +572,31 @@
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                              Location ref,
-                                             vixl::Register obj,
+                                             vixl::aarch64::Register obj,
                                              uint32_t offset,
-                                             vixl::Register temp,
+                                             vixl::aarch64::Register temp,
                                              bool needs_null_check,
                                              bool use_load_acquire);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
                                              Location ref,
-                                             vixl::Register obj,
+                                             vixl::aarch64::Register obj,
                                              uint32_t data_offset,
                                              Location index,
-                                             vixl::Register temp,
+                                             vixl::aarch64::Register temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::aarch64::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 size_t scale_factor,
+                                                 vixl::aarch64::Register temp,
+                                                 bool needs_null_check,
+                                                 bool use_load_acquire);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -570,57 +652,51 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 vixl::Register obj,
-                                                 uint32_t offset,
-                                                 Location index,
-                                                 vixl::Register temp,
-                                                 bool needs_null_check,
-                                                 bool use_load_acquire);
-
-  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
-  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::Literal<uint32_t>*>;
+  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
-                                          vixl::Literal<uint64_t>*,
+                                          vixl::aarch64::Literal<uint64_t>*,
                                           MethodReferenceComparator>;
   using BootStringToLiteralMap = ArenaSafeMap<StringReference,
-                                              vixl::Literal<uint32_t>*,
+                                              vixl::aarch64::Literal<uint32_t>*,
                                               StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            vixl::aarch64::Literal<uint32_t>*,
+                                            TypeReferenceValueComparator>;
 
-  vixl::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
-  vixl::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
-  vixl::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
-                                                    MethodToLiteralMap* map);
-  vixl::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
-  vixl::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value,
+                                                             Uint32ToLiteralMap* map);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
+                                                             MethodToLiteralMap* map);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
 
   // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
-  // and boot image strings. The only difference is the interpretation of the offset_or_index.
+  // and boot image strings/types. The only difference is the interpretation of the
+  // offset_or_index.
   struct PcRelativePatchInfo {
     PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
         : target_dex_file(dex_file), offset_or_index(off_or_idx), label(), pc_insn_label() { }
 
     const DexFile& target_dex_file;
-    // Either the dex cache array element offset or the string index.
+    // Either the dex cache array element offset or the string/type index.
     uint32_t offset_or_index;
-    vixl::Label label;
-    vixl::Label* pc_insn_label;
+    vixl::aarch64::Label label;
+    vixl::aarch64::Label* pc_insn_label;
   };
 
-  vixl::Label* NewPcRelativePatch(const DexFile& dex_file,
-                                  uint32_t offset_or_index,
-                                  vixl::Label* adrp_label,
-                                  ArenaDeque<PcRelativePatchInfo>* patches);
+  vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file,
+                                           uint32_t offset_or_index,
+                                           vixl::aarch64::Label* adrp_label,
+                                           ArenaDeque<PcRelativePatchInfo>* patches);
 
   void EmitJumpTables();
 
   // Labels for each block that will be compiled.
-  // We use a deque so that the `vixl::Label` objects do not move in memory.
-  ArenaDeque<vixl::Label> block_labels_;  // Indexed by block id.
-  vixl::Label frame_entry_label_;
+  // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
+  ArenaDeque<vixl::aarch64::Label> block_labels_;  // Indexed by block id.
+  vixl::aarch64::Label frame_entry_label_;
   ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
 
   LocationsBuilderARM64 location_builder_;
@@ -639,13 +715,17 @@
   MethodToLiteralMap call_patches_;
   // Relative call patch info.
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
-  ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_;
+  ArenaDeque<MethodPatchInfo<vixl::aarch64::Label>> relative_call_patches_;
   // PC-relative DexCache access info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
   // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
   BootStringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 06248a3..a7fbc84 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -39,6 +39,10 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = A0;
 
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+
 Location MipsReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
@@ -141,8 +145,9 @@
   return MipsReturnLocation(type);
 }
 
-#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
@@ -165,11 +170,15 @@
                                locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimInt);
-    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    mips_codegen->InvokeRuntime(entry_point_offset,
                                 instruction_,
                                 instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickThrowArrayBounds));
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -342,14 +351,12 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                 instruction_,
                                 instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickTestSuspend));
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ B(GetReturnLabel());
     } else {
@@ -406,7 +413,7 @@
                                   this,
                                   IsDirectEntrypoint(kQuickInstanceofNonTrivial));
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -472,14 +479,31 @@
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
       assembler_(graph->GetArena(), &isa_features),
-      isa_features_(isa_features) {
+      isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_patches_(MethodReferenceComparator(),
+                      graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      call_patches_(MethodReferenceComparator(),
+                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      clobbered_ra_(false) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
 }
 
 #undef __
-#define __ down_cast<MipsAssembler*>(GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
 
 void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
   // Ensure that we fix up branches.
@@ -663,6 +687,28 @@
   }
 }
 
+void CodeGeneratorMIPS::ComputeSpillMask() {
+  core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+  fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
+  DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  // If there're FPU callee-saved registers and there's an odd number of GPR callee-saved
+  // registers, include the ZERO register to force alignment of FPU callee-saved registers
+  // within the stack frame.
+  if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) {
+    core_spill_mask_ |= (1 << ZERO);
+  }
+  // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register
+  // (this can happen in leaf methods), artificially spill the ZERO register in order to
+  // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only
+  // spilled register.
+  // TODO: Can this be improved? It causes creation of a stack frame (while RA might be
+  // saved in an unused temporary register) and saving of RA and the current method pointer
+  // in the frame.
+  if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) {
+    core_spill_mask_ |= (1 << ZERO);
+  }
+}
+
 static dwarf::Reg DWARFReg(Register reg) {
   return dwarf::Reg::MipsCore(static_cast<int>(reg));
 }
@@ -692,105 +738,61 @@
   }
 
   // Spill callee-saved registers.
-  // Note that their cumulative size is small and they can be indexed using
-  // 16-bit offsets.
 
-  // TODO: increment/decrement SP in one step instead of two or remove this comment.
-
-  uint32_t ofs = FrameEntrySpillSize();
-  bool unaligned_float = ofs & 0x7;
-  bool fpu_32bit = isa_features_.Is32BitFloatingPoint();
+  uint32_t ofs = GetFrameSize();
   __ IncreaseFrameSize(ofs);
 
-  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
-    Register reg = kCoreCalleeSaves[i];
-    if (allocated_registers_.ContainsCoreRegister(reg)) {
-      ofs -= kMipsWordSize;
-      __ Sw(reg, SP, ofs);
+  for (uint32_t mask = core_spill_mask_; mask != 0; ) {
+    Register reg = static_cast<Register>(MostSignificantBit(mask));
+    mask ^= 1u << reg;
+    ofs -= kMipsWordSize;
+    // The ZERO register is only included for alignment.
+    if (reg != ZERO) {
+      __ StoreToOffset(kStoreWord, reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
 
-  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
-    FRegister reg = kFpuCalleeSaves[i];
-    if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-      ofs -= kMipsDoublewordSize;
-      // TODO: Change the frame to avoid unaligned accesses for fpu registers.
-      if (unaligned_float) {
-        if (fpu_32bit) {
-          __ Swc1(reg, SP, ofs);
-          __ Swc1(static_cast<FRegister>(reg + 1), SP, ofs + 4);
-        } else {
-          __ Mfhc1(TMP, reg);
-          __ Swc1(reg, SP, ofs);
-          __ Sw(TMP, SP, ofs + 4);
-        }
-      } else {
-        __ Sdc1(reg, SP, ofs);
-      }
-      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
-    }
+  for (uint32_t mask = fpu_spill_mask_; mask != 0; ) {
+    FRegister reg = static_cast<FRegister>(MostSignificantBit(mask));
+    mask ^= 1u << reg;
+    ofs -= kMipsDoublewordSize;
+    __ StoreDToOffset(reg, SP, ofs);
+    // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
   }
 
-  // Allocate the rest of the frame and store the current method pointer
-  // at its end.
-
-  __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
-  static_assert(IsInt<16>(kCurrentMethodStackOffset),
-                "kCurrentMethodStackOffset must fit into int16_t");
-  __ Sw(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+  // Store the current method pointer.
+  __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
 }
 
 void CodeGeneratorMIPS::GenerateFrameExit() {
   __ cfi().RememberState();
 
   if (!HasEmptyFrame()) {
-    // Deallocate the rest of the frame.
-
-    __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
     // Restore callee-saved registers.
-    // Note that their cumulative size is small and they can be indexed using
-    // 16-bit offsets.
 
-    // TODO: increment/decrement SP in one step instead of two or remove this comment.
-
-    uint32_t ofs = 0;
-    bool unaligned_float = FrameEntrySpillSize() & 0x7;
-    bool fpu_32bit = isa_features_.Is32BitFloatingPoint();
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      FRegister reg = kFpuCalleeSaves[i];
-      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-        if (unaligned_float) {
-          if (fpu_32bit) {
-            __ Lwc1(reg, SP, ofs);
-            __ Lwc1(static_cast<FRegister>(reg + 1), SP, ofs + 4);
-          } else {
-            __ Lwc1(reg, SP, ofs);
-            __ Lw(TMP, SP, ofs + 4);
-            __ Mthc1(TMP, reg);
-          }
-        } else {
-          __ Ldc1(reg, SP, ofs);
-        }
-        ofs += kMipsDoublewordSize;
-        // TODO: __ cfi().Restore(DWARFReg(reg));
-      }
-    }
-
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      Register reg = kCoreCalleeSaves[i];
-      if (allocated_registers_.ContainsCoreRegister(reg)) {
-        __ Lw(reg, SP, ofs);
-        ofs += kMipsWordSize;
+    // For better instruction scheduling restore RA before other registers.
+    uint32_t ofs = GetFrameSize();
+    for (uint32_t mask = core_spill_mask_; mask != 0; ) {
+      Register reg = static_cast<Register>(MostSignificantBit(mask));
+      mask ^= 1u << reg;
+      ofs -= kMipsWordSize;
+      // The ZERO register is only included for alignment.
+      if (reg != ZERO) {
+        __ LoadFromOffset(kLoadWord, reg, SP, ofs);
         __ cfi().Restore(DWARFReg(reg));
       }
     }
 
-    DCHECK_EQ(ofs, FrameEntrySpillSize());
-    __ DecreaseFrameSize(ofs);
+    for (uint32_t mask = fpu_spill_mask_; mask != 0; ) {
+      FRegister reg = static_cast<FRegister>(MostSignificantBit(mask));
+      mask ^= 1u << reg;
+      ofs -= kMipsDoublewordSize;
+      __ LoadDFromOffset(reg, SP, ofs);
+      // TODO: __ cfi().Restore(DWARFReg(reg));
+    }
+
+    __ DecreaseFrameSize(GetFrameSize());
   }
 
   __ Jr(RA);
@@ -974,6 +976,167 @@
   }
 }
 
+void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      pc_relative_dex_cache_patches_.size() +
+      pc_relative_string_patches_.size() +
+      pc_relative_type_patches_.size() +
+      boot_image_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      boot_image_address_patches_.size();
+  linker_patches->reserve(size);
+  for (const auto& entry : method_patches_) {
+    const MethodReference& target_method = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
+                                                       target_method.dex_file,
+                                                       target_method.dex_method_index));
+  }
+  for (const auto& entry : call_patches_) {
+    const MethodReference& target_method = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::CodePatch(literal_offset,
+                                                     target_method.dex_file,
+                                                     target_method.dex_method_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t base_element_offset = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    DCHECK(info.pc_rel_label.IsBound());
+    uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label);
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset,
+                                                              &dex_file,
+                                                              pc_rel_offset,
+                                                              base_element_offset));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t string_index = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+    // the assembler's base label used for PC-relative literals.
+    uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+        ? __ GetLabelLocation(&info.pc_rel_label)
+        : __ GetPcRelBaseLabelLocation();
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset,
+                                                               &dex_file,
+                                                               pc_rel_offset,
+                                                               string_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t type_index = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+    // the assembler's base label used for PC-relative literals.
+    uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+        ? __ GetLabelLocation(&info.pc_rel_label)
+        : __ GetPcRelBaseLabelLocation();
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset,
+                                                             &dex_file,
+                                                             pc_rel_offset,
+                                                             type_index));
+  }
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                       target_string.dex_file,
+                                                       target_string.string_index));
+  }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
+    const DexFile& dex_file, uint32_t string_index) {
+  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
+    const DexFile& dex_file, uint32_t type_index) {
+  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file, uint32_t element_offset) {
+  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch(
+    const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
+  patches->emplace_back(dex_file, offset_or_index);
+  return &patches->back();
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() { return __ NewLiteral<uint32_t>(value); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method,
+                                                     MethodToLiteralMap* map) {
+  return map->GetOrCreate(
+      target_method,
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodAddressLiteral(MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &method_patches_);
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &call_patches_);
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                              uint32_t string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                            uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
 void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
   MipsLabel done;
   Register card = AT;
@@ -982,7 +1145,7 @@
   __ LoadFromOffset(kLoadWord,
                     card,
                     TR,
-                    Thread::CardTableOffset<kMipsWordSize>().Int32Value());
+                    Thread::CardTableOffset<kMipsPointerSize>().Int32Value());
   __ Srl(temp, object, gc::accounting::CardTable::kCardShift);
   __ Addu(temp, card, temp);
   __ Sb(card, temp, 0);
@@ -1019,6 +1182,15 @@
     blocked_fpu_registers_[i] = true;
   }
 
+  if (GetGraph()->IsDebuggable()) {
+    // Stubs do not save callee-save floating point registers. If the graph
+    // is debuggable, we need to deal with these registers differently. For
+    // now, just block them.
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
+  }
+
   UpdateBlockedPairRegisters();
 }
 
@@ -1065,7 +1237,7 @@
                                       HInstruction* instruction,
                                       uint32_t dex_pc,
                                       SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kMipsWordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path,
@@ -1116,7 +1288,7 @@
   __ LoadFromOffset(kLoadUnsignedHalfword,
                     TMP,
                     TR,
-                    Thread::ThreadFlagsOffset<kMipsWordSize>().Int32Value());
+                    Thread::ThreadFlagsOffset<kMipsPointerSize>().Int32Value());
   if (successor == nullptr) {
     __ Bnez(TMP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -1665,11 +1837,11 @@
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1683,7 +1855,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1697,7 +1868,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1712,7 +1882,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1729,7 +1898,6 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1744,7 +1912,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       Register out = locations->Out().AsRegisterPairLow<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1759,7 +1926,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       FRegister out = locations->Out().AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1774,7 +1940,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       FRegister out = locations->Out().AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1803,7 +1968,7 @@
 
 void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
@@ -1814,7 +1979,7 @@
   bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
   if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2426,7 +2591,7 @@
 void LocationsBuilderMIPS::VisitDiv(HDiv* div) {
   Primitive::Type type = div->GetResultType();
   LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -3389,7 +3554,7 @@
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -3399,7 +3564,8 @@
     if (field_type == Primitive::kPrimLong) {
       locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong));
     } else {
-      locations->SetOut(Location::RequiresFpuRegister());
+      // Use Location::Any() to prevent situations when running out of available fp registers.
+      locations->SetOut(Location::Any());
       // Need some temp core regs since FP results are returned in core registers
       Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong);
       locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>()));
@@ -3464,11 +3630,23 @@
                             IsDirectEntrypoint(kQuickA64Load));
     CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
     if (type == Primitive::kPrimDouble) {
-      // Need to move to FP regs since FP results are returned in core registers.
-      __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
-              locations->Out().AsFpuRegister<FRegister>());
-      __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
-                       locations->Out().AsFpuRegister<FRegister>());
+      // FP results are returned in core registers. Need to move them.
+      Location out = locations->Out();
+      if (out.IsFpuRegister()) {
+        __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+        __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                         out.AsFpuRegister<FRegister>());
+      } else {
+        DCHECK(out.IsDoubleStackSlot());
+        __ StoreToOffset(kStoreWord,
+                         locations->GetTemp(1).AsRegister<Register>(),
+                         SP,
+                         out.GetStackIndex());
+        __ StoreToOffset(kStoreWord,
+                         locations->GetTemp(2).AsRegister<Register>(),
+                         SP,
+                         out.GetStackIndex() + 4);
+      }
     }
   } else {
     if (!Primitive::IsFloatingPointType(type)) {
@@ -3516,7 +3694,7 @@
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -3527,7 +3705,8 @@
       locations->SetInAt(1, Location::RegisterPairLocation(
           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
     } else {
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      // Use Location::Any() to prevent situations when running out of available fp registers.
+      locations->SetInAt(1, Location::Any());
       // Pass FP parameters in core registers.
       locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
       locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
@@ -3586,10 +3765,28 @@
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
     if (type == Primitive::kPrimDouble) {
       // Pass FP parameters in core registers.
-      __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
-              locations->InAt(1).AsFpuRegister<FRegister>());
-      __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
-                         locations->InAt(1).AsFpuRegister<FRegister>());
+      Location in = locations->InAt(1);
+      if (in.IsFpuRegister()) {
+        __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>());
+        __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                           in.AsFpuRegister<FRegister>());
+      } else if (in.IsDoubleStackSlot()) {
+        __ LoadFromOffset(kLoadWord,
+                          locations->GetTemp(1).AsRegister<Register>(),
+                          SP,
+                          in.GetStackIndex());
+        __ LoadFromOffset(kLoadWord,
+                          locations->GetTemp(2).AsRegister<Register>(),
+                          SP,
+                          in.GetStackIndex() + 4);
+      } else {
+        DCHECK(in.IsConstant());
+        DCHECK(in.GetConstant()->IsDoubleConstant());
+        int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue());
+        __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(),
+                       locations->GetTemp(1).AsRegister<Register>(),
+                       value);
+      }
     }
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
                             instruction,
@@ -3655,6 +3852,23 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
 }
 
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location root,
+    Register obj,
+    uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
 void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind =
       instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
@@ -3733,7 +3947,7 @@
   Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
 
   // Set the hidden argument.
   __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
@@ -3750,7 +3964,7 @@
   __ LoadFromOffset(kLoadWord, temp, temp,
       mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kMipsPointerSize));
+      invoke->GetImtIndex(), kMipsPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3776,12 +3990,38 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+
+  // kDirectAddressWithFixup and kCallDirectWithFixup need no extra input on R6 because
+  // R6 has PC-relative addressing.
+  bool has_extra_input = !isR6 &&
+      ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+       (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup));
+
+  if (invoke->HasPcRelativeDexCache()) {
+    // kDexCachePcRelative is mutually exclusive with
+    // kDirectAddressWithFixup/kCallDirectWithFixup.
+    CHECK(!has_extra_input);
+    has_extra_input = true;
+  }
+
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && has_extra_input) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // Add the extra input register if either the dex cache array base register
+  // or the PC-relative base register for accessing literals is needed.
+  if (has_extra_input) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) {
@@ -3794,47 +4034,179 @@
 }
 
 HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
-    HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
-  // TODO: Implement other kinds.
-  return HLoadString::LoadKind::kDexCacheViaMethod;
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  }
+  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = has_irreducible_loops;
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      fallback_load = false;
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
+  }
+  if (fallback_load) {
+    desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+  }
+  return desired_string_load_kind;
+}
+
+HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  }
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = has_irreducible_loops;
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      fallback_load = false;
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      fallback_load = false;
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
+  }
+  if (fallback_load) {
+    desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+  }
+  return desired_class_load_kind;
+}
+
+Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                  Register temp) {
+  CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
 }
 
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
-  switch (desired_dispatch_info.method_load_kind) {
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = true;
+  bool fallback_call = true;
+  switch (dispatch_info.method_load_kind) {
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod.
-      return HInvokeStaticOrDirect::DispatchInfo {
-        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        0u,
-        0u
-      };
+      fallback_load = has_irreducible_loops;
+      break;
     default:
+      fallback_load = false;
       break;
   }
-  switch (desired_dispatch_info.code_ptr_location) {
+  switch (dispatch_info.code_ptr_location) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      fallback_call = has_irreducible_loops;
+      break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      // TODO: Implement these types. For the moment, we fall back to kCallArtMethod.
-      return HInvokeStaticOrDirect::DispatchInfo {
-        desired_dispatch_info.method_load_kind,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        desired_dispatch_info.method_load_data,
-        0u
-      };
+      // TODO: Implement this type.
+      break;
     default:
-      return desired_dispatch_info;
+      fallback_call = false;
+      break;
   }
+  if (fallback_load) {
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+    dispatch_info.method_load_data = 0;
+  }
+  if (fallback_call) {
+    dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+    dispatch_info.direct_code_ptr = 0;
+  }
+  return dispatch_info;
 }
 
 void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // All registers are assumed to be correctly set up per the calling convention.
-
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
-  switch (invoke->GetMethodLoadKind()) {
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
+  bool isR6 = isa_features_.IsR6();
+  // kDirectAddressWithFixup and kCallDirectWithFixup have no extra input on R6 because
+  // R6 has PC-relative addressing.
+  bool has_extra_input = invoke->HasPcRelativeDexCache() ||
+      (!isR6 &&
+       ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+        (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup)));
+  Register base_reg = has_extra_input
+      ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
+      : ZERO;
+
+  // For better instruction scheduling we load the direct code pointer before the method pointer.
+  switch (code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // T9 = invoke->GetDirectCodePtr();
+      __ LoadConst32(T9, invoke->GetDirectCodePtr());
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // T9 = code address from literal pool with link-time patch.
+      __ LoadLiteral(T9, base_reg, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod()));
+      break;
+    default:
+      break;
+  }
+
+  switch (method_load_kind) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
       __ LoadFromOffset(kLoadWord,
@@ -3849,11 +4221,18 @@
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement these types.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+      __ LoadLiteral(temp.AsRegister<Register>(),
+                     base_reg,
+                     DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = temp.AsRegister<Register>();
@@ -3884,20 +4263,19 @@
     }
   }
 
-  switch (invoke->GetCodePtrLocation()) {
+  switch (code_ptr_location) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
-      __ Jalr(&frame_entry_label_, T9);
+      __ Bal(&frame_entry_label_);
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
-      // LR = invoke->GetDirectCodePtr();
-      __ LoadConst32(T9, invoke->GetDirectCodePtr());
-      // LR()
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // T9 prepared above for better instruction scheduling.
+      // T9()
       __ Jalr(T9);
       __ Nop();
       break;
-    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      // TODO: Implement these types.
+      // TODO: Implement this type.
       // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
@@ -3907,7 +4285,7 @@
                         T9,
                         callee_method.AsRegister<Register>(),
                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kMipsWordSize).Int32Value());
+                            kMipsPointerSize).Int32Value());
       // T9()
       __ Jalr(T9);
       __ Nop();
@@ -3940,7 +4318,7 @@
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMipsPointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
 
   // temp = object->GetClass();
   DCHECK(receiver.IsRegister());
@@ -3966,11 +4344,40 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(V0));
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(V0),
+        /* code_generator_supports_read_barrier */ false);  // TODO: revisit this bool.
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kReferrersClass:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      locations->SetInAt(0, Location::RequiresRegister());
+      break;
+    default:
+      break;
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
@@ -3986,40 +4393,132 @@
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    __ LoadFromOffset(kLoadWord, out, current_method,
-                      ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
-    __ LoadFromOffset(kLoadWord, out, current_method,
-                      ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
-    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Register base_or_current_method_reg;
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      break;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kReferrersClass:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
+      break;
+    default:
+      base_or_current_method_reg = ZERO;
+      break;
+  }
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
-          cls,
-          cls,
-          cls->GetDexPc(),
-          cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ Beqz(out, slow_path->GetEntryLabel());
-      }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
+  bool generate_null_check = false;
+  switch (load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(cls,
+                              out_loc,
+                              base_or_current_method_reg,
+                              ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                               cls->GetTypeIndex()));
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      if (isR6) {
+        __ Bind(&info->high_label);
+        __ Bind(&info->pc_rel_label);
+        // Add a 32-bit offset to PC.
+        __ Auipc(out, /* placeholder */ 0x1234);
+        __ Addiu(out, out, /* placeholder */ 0x5678);
       } else {
-        __ Bind(slow_path->GetExitLabel());
+        __ Bind(&info->high_label);
+        __ Lui(out, /* placeholder */ 0x1234);
+        // We do not bind info->pc_rel_label here, we'll use the assembler's label
+        // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+        __ Ori(out, out, /* placeholder */ 0x5678);
+        // Add a 32-bit offset to PC.
+        __ Addu(out, out, base_or_current_method_reg);
       }
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageAddressLiteral(address));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      int16_t offset = Low16Bits(address);
+      uint32_t base_address = address - offset;  // This accounts for offset sign extension.
+      __ Lui(out, High16Bits(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base = cls->InputAt(0)->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          cls->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      __ LoadFromOffset(kLoadWord,
+                        out,
+                        base_or_current_method_reg,
+                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ Beqz(out, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kMipsWordSize>().Int32Value();
+  return Thread::ExceptionOffset<kMipsPointerSize>().Int32Value();
 }
 
 void LocationsBuilderMIPS::VisitLoadException(HLoadException* load) {
@@ -4042,28 +4541,97 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadString::LoadKind::kDexCachePcRelative:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      locations->SetInAt(0, Location::RequiresRegister());
+      break;
+    default:
+      break;
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
   LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-
-  if (!load->IsInDexCache()) {
-    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
-    codegen_->AddSlowPath(slow_path);
-    __ Beqz(out, slow_path->GetEntryLabel());
-    __ Bind(slow_path->GetExitLabel());
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Register base_or_current_method_reg;
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      break;
+    default:
+      base_or_current_method_reg = ZERO;
+      break;
   }
+
+  switch (load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                                 load->GetStringIndex()));
+      return;  // No dex cache slow path.
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+      if (isR6) {
+        __ Bind(&info->high_label);
+        __ Bind(&info->pc_rel_label);
+        // Add a 32-bit offset to PC.
+        __ Auipc(out, /* placeholder */ 0x1234);
+        __ Addiu(out, out, /* placeholder */ 0x5678);
+      } else {
+        __ Bind(&info->high_label);
+        __ Lui(out, /* placeholder */ 0x1234);
+        // We do not bind info->pc_rel_label here, we'll use the assembler's label
+        // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+        __ Ori(out, out, /* placeholder */ 0x5678);
+        // Add a 32-bit offset to PC.
+        __ Addu(out, out, base_or_current_method_reg);
+      }
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageAddressLiteral(address));
+      return;  // No dex cache slow path.
+    }
+    default:
+      break;
+  }
+
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
+  codegen_->AddSlowPath(slow_path);
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
 }
 
 void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
@@ -4077,7 +4645,7 @@
 
 void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4256,7 +4824,7 @@
 
 void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
@@ -4271,7 +4839,7 @@
   // Move an uint16_t value to a register.
   __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   codegen_->InvokeRuntime(
-      GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+      GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(),
       instruction,
       instruction->GetDexPc(),
       nullptr,
@@ -4282,7 +4850,7 @@
 
 void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -4297,7 +4865,7 @@
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
-    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
     __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
     __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
     __ Jalr(T9);
@@ -4305,7 +4873,7 @@
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
   } else {
     codegen_->InvokeRuntime(
-        GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+        GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(),
         instruction,
         instruction->GetDexPc(),
         nullptr,
@@ -4439,7 +5007,7 @@
 
 void LocationsBuilderMIPS::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -4452,7 +5020,7 @@
 void LocationsBuilderMIPS::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCall;
+      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCallOnMainOnly;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -4689,7 +5257,7 @@
 
 void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4718,7 +5286,7 @@
   if (!isR6 &&
       ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
        (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -4756,7 +5324,6 @@
   Primitive::Type input_type = conversion->GetInputType();
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
-  bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
 
   DCHECK_NE(input_type, result_type);
 
@@ -4765,7 +5332,9 @@
     Register dst_low = locations->Out().AsRegisterPairLow<Register>();
     Register src = locations->InAt(0).AsRegister<Register>();
 
-    __ Move(dst_low, src);
+    if (dst_low != src) {
+      __ Move(dst_low, src);
+    }
     __ Sra(dst_high, src, 31);
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
     Register dst = locations->Out().AsRegister<Register>();
@@ -4794,7 +5363,9 @@
         }
         break;
       case Primitive::kPrimInt:
-        __ Move(dst, src);
+        if (dst != src) {
+          __ Move(dst, src);
+        }
         break;
 
       default:
@@ -4951,11 +5522,7 @@
         uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
         __ LoadConst32(TMP, High32Bits(min_val));
         __ Mtc1(ZERO, FTMP);
-        if (fpu_32bit) {
-          __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
-        } else {
-          __ Mthc1(TMP, FTMP);
-        }
+        __ MoveToFpuHigh(TMP, FTMP);
       }
 
       if (isR6) {
@@ -5169,6 +5736,59 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress(
+    HMipsComputeBaseMethodAddress* insn) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress(
+    HMipsComputeBaseMethodAddress* insn) {
+  LocationSummary* locations = insn->GetLocations();
+  Register reg = locations->Out().AsRegister<Register>();
+
+  CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
+
+  // Generate a dummy PC-relative call to obtain PC.
+  __ Nal();
+  // Grab the return address off RA.
+  __ Move(reg, RA);
+  // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()?
+
+  // Remember this offset (the obtained PC value) for later use with constant area.
+  __ BindPcRelBaseLabel();
+}
+
+void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) {
+  Register reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorMIPS::PcRelativePatchInfo* info =
+      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+
+  if (codegen_->GetInstructionSetFeatures().IsR6()) {
+    __ Bind(&info->high_label);
+    __ Bind(&info->pc_rel_label);
+    // Add a 32-bit offset to PC.
+    __ Auipc(reg, /* placeholder */ 0x1234);
+    __ Addiu(reg, reg, /* placeholder */ 0x5678);
+  } else {
+    // Generate a dummy PC-relative call to obtain PC.
+    __ Nal();
+    __ Bind(&info->high_label);
+    __ Lui(reg, /* placeholder */ 0x1234);
+    __ Bind(&info->pc_rel_label);
+    __ Ori(reg, reg, /* placeholder */ 0x5678);
+    // Add a 32-bit offset to PC.
+    __ Addu(reg, reg, RA);
+    // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()?
+  }
+}
+
 void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   // The trampoline uses the same calling convention as dex calling conventions,
   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
@@ -5198,7 +5818,7 @@
                       method_offset);
   } else {
     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kMipsPointerSize));
+        instruction->GetIndex(), kMipsPointerSize));
     __ LoadFromOffset(kLoadWord,
                       locations->Out().AsRegister<Register>(),
                       locations->InAt(0).AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 435a869..63a0345 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -18,11 +18,12 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "string_reference.h"
 #include "utils/mips/assembler_mips.h"
+#include "utils/type_reference.h"
 
 namespace art {
 namespace mips {
@@ -226,6 +227,15 @@
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
   void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
   void GenerateIntCompareAndBranch(IfCondition cond,
                                    LocationSummary* locations,
@@ -262,6 +272,7 @@
                     OptimizingCompilerStats* stats = nullptr);
   virtual ~CodeGeneratorMIPS() {}
 
+  void ComputeSpillMask() OVERRIDE;
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
 
@@ -284,6 +295,9 @@
   MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; }
   const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; }
 
+  // Emit linker patches.
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+
   void MarkGCCard(Register object, Register value);
 
   // Register allocation.
@@ -294,6 +308,9 @@
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+  void ClobberRA() {
+    clobbered_ra_ = true;
+  }
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -348,6 +365,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -366,7 +388,52 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
+  // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
+  // and boot image strings. The only difference is the interpretation of the offset_or_index.
+  struct PcRelativePatchInfo {
+    PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
+        : target_dex_file(dex_file), offset_or_index(off_or_idx) { }
+    PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
+
+    const DexFile& target_dex_file;
+    // Either the dex cache array element offset or the string/type index.
+    uint32_t offset_or_index;
+    // Label for the instruction loading the most significant half of the offset that's added to PC
+    // to form the base address (the least significant half is loaded with the instruction that
+    // follows).
+    MipsLabel high_label;
+    // Label for the instruction corresponding to PC+0.
+    MipsLabel pc_rel_label;
+  };
+
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
+  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                       uint32_t element_offset);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
+  Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
+
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
+  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+                                              Literal*,
+                                              StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            Literal*,
+                                            TypeReferenceValueComparator>;
+
+  Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
+  Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
+  Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
+  PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
+                                          uint32_t offset_or_index,
+                                          ArenaDeque<PcRelativePatchInfo>* patches);
+
   // Labels for each block that will be compiled.
   MipsLabel* block_labels_;
   MipsLabel frame_entry_label_;
@@ -376,6 +443,28 @@
   MipsAssembler assembler_;
   const MipsInstructionSetFeatures& isa_features_;
 
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Method patch info, map MethodReference to a literal for method address and method code.
+  MethodToLiteralMap method_patches_;
+  MethodToLiteralMap call_patches_;
+  // PC-relative patch info for each HMipsDexCacheArraysBase.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  BootStringToLiteralMap boot_image_string_patches_;
+  // PC-relative String patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
+
+  // PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods.
+  // This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations.
+  bool clobbered_ra_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS);
 };
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 9b405bb..4a5755c 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -102,8 +102,9 @@
   return Mips64ReturnLocation(type);
 }
 
-#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
@@ -126,10 +127,14 @@
                                locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimInt);
-    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    mips64_codegen->InvokeRuntime(entry_point_offset,
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -295,13 +300,11 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ Bc(GetReturnLabel());
     } else {
@@ -357,7 +360,7 @@
                                     dex_pc,
                                     this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -424,8 +427,9 @@
 }
 
 #undef __
-#define __ down_cast<Mips64Assembler*>(GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
   // Ensure that we fix up branches.
@@ -882,7 +886,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     card,
                     TR,
-                    Thread::CardTableOffset<kMips64DoublewordSize>().Int32Value());
+                    Thread::CardTableOffset<kMips64PointerSize>().Int32Value());
   __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
   __ Daddu(temp, card, temp);
   __ Sb(card, temp, 0);
@@ -916,13 +920,13 @@
 
   // TODO: review; anything else?
 
-  // TODO: remove once all the issues with register saving/restoring are sorted out.
-  for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-    blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-  }
-
-  for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-    blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+  if (GetGraph()->IsDebuggable()) {
+    // Stubs do not save callee-save floating point registers. If the graph
+    // is debuggable, we need to deal with these registers differently. For
+    // now, just block them.
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
   }
 }
 
@@ -958,7 +962,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kMips64DoublewordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -998,7 +1002,7 @@
   __ LoadFromOffset(kLoadUnsignedHalfword,
                     TMP,
                     TR,
-                    Thread::ThreadFlagsOffset<kMips64DoublewordSize>().Int32Value());
+                    Thread::ThreadFlagsOffset<kMips64PointerSize>().Int32Value());
   if (successor == nullptr) {
     __ Bnezc(TMP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -1287,11 +1291,11 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1305,7 +1309,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1319,7 +1322,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1334,7 +1336,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1351,7 +1352,6 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
       if (index.IsConstant()) {
@@ -1367,7 +1367,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1382,7 +1381,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1397,7 +1395,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1426,7 +1423,7 @@
 
 void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
@@ -1437,7 +1434,7 @@
   bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
   if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2935,7 +2932,7 @@
   GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
 
   // Set the hidden argument.
   __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(),
@@ -2952,7 +2949,7 @@
   __ LoadFromOffset(kLoadDoubleword, temp, temp,
       mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kMips64PointerSize));
+      invoke->GetImtIndex(), kMips64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -2984,19 +2981,6 @@
   }
 
   HandleInvoke(invoke);
-
-  // While SetupBlockedRegisters() blocks registers S2-S8 due to their
-  // clobbering somewhere else, reduce further register pressure by avoiding
-  // allocation of a register for the current method pointer like on x86 baseline.
-  // TODO: remove this once all the issues with register saving/restoring are
-  // sorted out.
-  if (invoke->HasCurrentMethodInput()) {
-    LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetSpecialInputIndex());
-    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
-    }
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
@@ -3014,6 +2998,13 @@
   return HLoadString::LoadKind::kDexCacheViaMethod;
 }
 
+HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass);
+  // TODO: Implement other kinds.
+  return HLoadClass::LoadKind::kDexCacheViaMethod;
+}
+
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
@@ -3122,7 +3113,7 @@
                         T9,
                         callee_method.AsRegister<GpuRegister>(),
                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kMips64DoublewordSize).Int32Value());
+                            kMips64PointerSize).Int32Value());
       // T9()
       __ Jalr(T9);
       __ Nop();
@@ -3160,7 +3151,7 @@
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
 
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
@@ -3238,7 +3229,7 @@
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value();
+  return Thread::ExceptionOffset<kMips64PointerSize>().Int32Value();
 }
 
 void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) {
@@ -3270,22 +3261,11 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations = load->GetLocations();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-  GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
-  __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
-                    ArtMethod::DeclaringClassOffset().Int32Value());
-  __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(
-      kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  // TODO: We will need a read barrier here.
-
-  if (!load->IsInDexCache()) {
-    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
-    codegen_->AddSlowPath(slow_path);
-    __ Beqzc(out, slow_path->GetEntryLabel());
-    __ Bind(slow_path->GetExitLabel());
-  }
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+  codegen_->AddSlowPath(slow_path);
+  __ Bc(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
 }
 
 void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
@@ -3299,7 +3279,7 @@
 
 void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -3426,7 +3406,7 @@
 
 void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
@@ -3447,7 +3427,7 @@
 
 void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3463,7 +3443,7 @@
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
     MemberOffset code_offset =
-        ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
+        ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
     __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
     __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
     __ Jalr(T9);
@@ -3594,7 +3574,7 @@
 
 void LocationsBuilderMIPS64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -3607,7 +3587,8 @@
 void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                           : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -3820,7 +3801,7 @@
 
 void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 9785a2e..197f86b 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -18,7 +18,6 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -340,6 +339,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 51d9b7c..f50eb5c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -47,8 +47,9 @@
 
 static constexpr int kFakeReturnRegister = Register(8);
 
-#define __ down_cast<X86Assembler*>(codegen->GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
 
 class NullCheckSlowPathX86 : public SlowPathCode {
  public:
@@ -139,18 +140,39 @@
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
+
+    // Are we using an array length from memory?
+    HInstruction* array_length = instruction_->InputAt(1);
+    Location length_loc = locations->InAt(1);
     InvokeRuntimeCallingConvention calling_convention;
+    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+      // Load the array length into our temporary.
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<Register>(), len_offset);
+      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+      // Check for conflicts with index.
+      if (length_loc.Equals(locations->InAt(0))) {
+        // We know we aren't using parameter 2.
+        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      }
+      __ movl(length_loc.AsRegister<Register>(), array_len);
+    }
     x86_codegen->EmitParallelMoves(
         locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        locations->InAt(1),
+        length_loc,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    x86_codegen->InvokeRuntime(entry_point_offset,
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -170,13 +192,11 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
@@ -327,7 +347,7 @@
                                  instruction_->GetDexPc(),
                                  this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -425,8 +445,8 @@
 // Slow path marking an object during a read barrier.
 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
  public:
-  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj)
-      : SlowPathCode(instruction), out_(out), obj_(obj) {
+  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison)
+      : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -434,39 +454,56 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg_out = out_.AsRegister<Register>();
+    Register reg = obj_.AsRegister<Register>();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
+    if (unpoison_) {
+      // Object* ref = ref_addr->AsMirrorPtr()
+      __ MaybeUnpoisonHeapReference(reg);
+    }
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
-    x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
-    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
-    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
-
-    RestoreLiveRegisters(codegen, locations);
+    DCHECK_NE(reg, ESP);
+    DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in EAX):
+    //
+    //   EAX <- obj
+    //   EAX <- ReadBarrierMark(EAX)
+    //   obj <- EAX
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg);
+    // This runtime call does not require a stack map.
+    x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
   }
 
  private:
-  const Location out_;
   const Location obj_;
+  const bool unpoison_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
 };
@@ -506,9 +543,12 @@
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()))
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -520,7 +560,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = index_.AsRegister<Register>();
@@ -568,7 +608,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddImmediate(index_reg, Immediate(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -691,7 +735,8 @@
 };
 
 #undef __
-#define __ down_cast<X86Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
 
 inline Condition X86Condition(IfCondition cond) {
   switch (cond) {
@@ -762,7 +807,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kX86WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -777,6 +822,13 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                           HInstruction* instruction,
+                                                           SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ fs()->call(Address::Absolute(entry_point_offset));
+}
+
 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
                                    const X86InstructionSetFeatures& isa_features,
                                    const CompilerOptions& compiler_options,
@@ -802,6 +854,7 @@
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       constant_area_start_(-1),
       fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_address_offset_(-1) {
@@ -1531,15 +1584,15 @@
   locations->SetOut(Location::SameAsFirstInput());
 }
 
-void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
   Register lhs_reg = lhs.AsRegister<Register>();
   if (rhs.IsConstant()) {
     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-    codegen_->Compare32BitValue(lhs_reg, value);
+    Compare32BitValue(lhs_reg, value);
   } else if (rhs.IsStackSlot()) {
-    __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+    assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
   } else {
-    __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+    assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>());
   }
 }
 
@@ -1572,7 +1625,7 @@
         DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
         DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
         LocationSummary* cond_locations = condition->GetLocations();
-        GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+        codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
         cond = X86Condition(condition->GetCondition());
       }
     } else {
@@ -1681,7 +1734,7 @@
 
       // Clear output register: setb only sets the low byte.
       __ xorl(reg, reg);
-      GenerateIntCompare(lhs, rhs);
+      codegen_->GenerateIntCompare(lhs, rhs);
       __ setb(X86Condition(cond->GetCondition()), reg);
       return;
     }
@@ -2041,11 +2094,11 @@
       Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kX86PointerSize));
+      invoke->GetImtIndex(), kX86PointerSize));
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(temp,
-                  ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+                  ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2168,7 +2221,7 @@
   LocationSummary::CallKind call_kind =
       ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
        && result_type == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -3306,17 +3359,6 @@
   int shift;
   CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
 
-  NearLabel ndiv;
-  NearLabel end;
-  // If numerator is 0, the result is 0, no computation needed.
-  __ testl(eax, eax);
-  __ j(kNotEqual, &ndiv);
-
-  __ xorl(out, out);
-  __ jmp(&end);
-
-  __ Bind(&ndiv);
-
   // Save the numerator.
   __ movl(num, eax);
 
@@ -3351,7 +3393,6 @@
   } else {
     __ movl(eax, edx);
   }
-  __ Bind(&end);
 }
 
 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
@@ -3435,7 +3476,7 @@
 
 void LocationsBuilderX86::VisitDiv(HDiv* div) {
   LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
 
@@ -3538,7 +3579,7 @@
   Primitive::Type type = rem->GetResultType();
 
   LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
@@ -3980,7 +4021,7 @@
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   locations->SetOut(Location::RegisterLocation(EAX));
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3997,7 +4038,7 @@
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
-    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize);
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize);
     __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
     __ call(Address(temp, code_offset.Int32Value()));
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -4013,7 +4054,7 @@
 
 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   locations->SetOut(Location::RegisterLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -4075,7 +4116,7 @@
             Address(locations->InAt(0).AsRegister<Register>(), method_offset));
   } else {
     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kX86PointerSize));
+        instruction->GetIndex(), kX86PointerSize));
     __ movl(locations->Out().AsRegister<Register>(),
             Address(locations->InAt(0).AsRegister<Register>(),
                     mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
@@ -4175,7 +4216,7 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar:
     case Primitive::kPrimInt: {
-      GenerateIntCompare(left, right);
+      codegen_->GenerateIntCompare(left, right);
       break;
     }
     case Primitive::kPrimLong: {
@@ -4247,7 +4288,7 @@
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -4274,8 +4315,10 @@
       // nop
       break;
     }
-    default:
-      LOG(FATAL) << "Unexpected memory barrier " << kind;
+    case MemBarrierKind::kNTStoreStore:
+      // Non-Temporal Store/Store needs an explicit fence.
+      MemoryFence(/* non-temporal */ true);
+      break;
   }
 }
 
@@ -4326,16 +4369,18 @@
   // save one load. However, since this is just an intrinsic slow path we prefer this
   // simple and more robust approach rather that trying to determine if that's the case.
   SlowPathCode* slow_path = GetCurrentSlowPath();
-  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
-  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
-    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
-    __ movl(temp, Address(ESP, stack_offset));
-    return temp;
+  if (slow_path != nullptr) {
+    if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+      int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+      __ movl(temp, Address(ESP, stack_offset));
+      return temp;
+    }
   }
   return location.AsRegister<Register>();
 }
 
-void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                  Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -4384,6 +4429,11 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -4405,7 +4455,7 @@
       // (callee_method + offset_of_quick_compiled_code)()
       __ call(Address(callee_method.AsRegister<Register>(),
                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                          kX86WordSize).Int32Value()));
+                          kX86PointerSize).Int32Value()));
       break;
   }
 
@@ -4439,7 +4489,7 @@
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(
-      temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+      temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
 }
 
 void CodeGeneratorX86::RecordSimplePatch() {
@@ -4454,6 +4504,11 @@
   __ Bind(&string_patches_.back().label);
 }
 
+void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) {
+  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex());
+  __ Bind(&type_patches_.back().label);
+}
+
 Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                          uint32_t element_offset) {
   // Add the patch entry and bind its label at the end of the instruction.
@@ -4468,7 +4523,8 @@
       relative_call_patches_.size() +
       pc_relative_dex_cache_patches_.size() +
       simple_patches_.size() +
-      string_patches_.size();
+      string_patches_.size() +
+      type_patches_.size();
   linker_patches->reserve(size);
   // The label points to the end of the "movl" insn but the literal offset for method
   // patch needs to point to the embedded constant which occupies the last 4 bytes.
@@ -4504,6 +4560,13 @@
                                                                  GetMethodAddressOffset(),
                                                                  info.string_index));
     }
+    for (const TypePatchInfo<Label>& info : type_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset,
+                                                               &info.dex_file,
+                                                               GetMethodAddressOffset(),
+                                                               info.type_index));
+    }
   } else {
     for (const StringPatchInfo<Label>& info : string_patches_) {
       uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
@@ -4511,6 +4574,12 @@
                                                          &info.dex_file,
                                                          info.string_index));
     }
+    for (const TypePatchInfo<Label>& info : type_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                       &info.dex_file,
+                                                       info.type_index));
+    }
   }
 }
 
@@ -4524,7 +4593,7 @@
     __ testl(value, value);
     __ j(kEqual, &is_null);
   }
-  __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value()));
+  __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
   __ movl(temp, object);
   __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
   __ movb(Address(temp, card, TIMES_1, 0),
@@ -4567,10 +4636,6 @@
     // load the temp into the XMM and then copy the XMM into the
     // output, 32 bits at a time).
     locations->AddTemp(Location::RequiresFpuRegister());
-  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
-    // We need a temporary register for the read barrier marking slow
-    // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
-    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -4614,11 +4679,10 @@
     case Primitive::kPrimNot: {
       // /* HeapReference<Object> */ out = *(base + offset)
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-        Location temp_loc = locations->GetTemp(0);
         // Note that a potential implicit null check is handled in this
         // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
         codegen_->GenerateFieldLoadWithBakerReadBarrier(
-            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+            instruction, out, base, offset, /* needs_null_check */ true);
         if (is_volatile) {
           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
         }
@@ -5029,11 +5093,6 @@
             Location::kOutputOverlap :
             Location::kNoOutputOverlap);
   }
-  // We need a temporary register for the read barrier marking slow
-  // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
-  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -5042,11 +5101,11 @@
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
@@ -5058,7 +5117,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
@@ -5070,7 +5128,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
@@ -5082,7 +5139,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
@@ -5094,7 +5150,6 @@
     }
 
     case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
@@ -5109,15 +5164,13 @@
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       // /* HeapReference<Object> */ out =
       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-        Location temp = locations->GetTemp(0);
         // Note that a potential implicit null check is handled in this
         // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
         codegen_->GenerateArrayLoadWithBakerReadBarrier(
-            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+            instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
       } else {
         Register out = out_loc.AsRegister<Register>();
         if (index.IsConstant()) {
@@ -5143,7 +5196,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -5161,7 +5213,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
@@ -5173,7 +5224,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
@@ -5493,12 +5543,18 @@
 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ movl(out, Address(obj, offset));
@@ -5511,7 +5567,10 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  HInstruction* length = instruction->InputAt(1);
+  if (!length->IsEmittedAtUseSite()) {
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -5545,12 +5604,28 @@
     codegen_->AddSlowPath(slow_path);
     __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    Register length = length_loc.AsRegister<Register>();
-    if (index_loc.IsConstant()) {
-      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-      __ cmpl(length, Immediate(value));
+    HInstruction* array_length = instruction->InputAt(1);
+    if (array_length->IsEmittedAtUseSite()) {
+      // Address the length field in the array.
+      DCHECK(array_length->IsArrayLength());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<Register>(), len_offset);
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(array_len, Immediate(value));
+      } else {
+        __ cmpl(array_len, index_loc.AsRegister<Register>());
+      }
+      codegen_->MaybeRecordImplicitNullCheck(array_length);
     } else {
-      __ cmpl(length, index_loc.AsRegister<Register>());
+      Register length = length_loc.AsRegister<Register>();
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(length, Immediate(value));
+      } else {
+        __ cmpl(length, index_loc.AsRegister<Register>());
+      }
     }
     codegen_->AddSlowPath(slow_path);
     __ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -5599,7 +5674,7 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()),
+  __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
                 Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
@@ -5875,13 +5950,72 @@
   __ popl(static_cast<Register>(reg));
 }
 
+HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      FALLTHROUGH_INTENDED;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());  // Note: boot image is also non-JIT.
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many X86ComputeBaseMethodAddress instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(EAX),
-      /* code_generator_supports_read_barrier */ true);
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(EAX),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
+      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
@@ -5898,39 +6032,86 @@
 
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    GenerateGcRootFieldLoad(
-        cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    // /* GcRoot<mirror::Class>[] */ out =
-    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-    __ movl(out, Address(current_method,
-                         ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
-    // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(
-        cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ movl(out, Immediate(/* placeholder */ 0));
+      codegen_->RecordTypePatch(cls);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      Register method_address = locations->InAt(0).AsRegister<Register>();
+      __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
+      codegen_->RecordTypePatch(cls);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ movl(out, Immediate(address));
+      codegen_->RecordSimplePatch();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      // /* GcRoot<mirror::Class> */ out = *address
+      GenerateGcRootFieldLoad(cls, out_loc, Address::Absolute(address));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      Register base_reg = locations->InAt(0).AsRegister<Register>();
+      uint32_t offset = cls->GetDexCacheElementOffset();
+      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
+      // /* GcRoot<mirror::Class> */ out = *(base + offset)  /* PC-relative */
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      __ movl(out, Address(current_method,
+                           ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+  }
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
-          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
 
-      if (!cls->IsInDexCache()) {
-        __ testl(out, out);
-        __ j(kEqual, slow_path->GetEntryLabel());
-      }
+    if (generate_null_check) {
+      __ testl(out, out);
+      __ j(kEqual, slow_path->GetEntryLabel());
+    }
 
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
-      } else {
-        __ Bind(slow_path->GetExitLabel());
-      }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -6044,50 +6225,19 @@
       codegen_->RecordSimplePatch();
       return;  // No dex cache slow path.
     }
-    case HLoadString::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(load->GetAddress(), 0u);
-      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
-      GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address));
-      break;
-    }
-    case HLoadString::LoadKind::kDexCachePcRelative: {
-      Register base_reg = locations->InAt(0).AsRegister<Register>();
-      uint32_t offset = load->GetDexCacheElementOffset();
-      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
-      GenerateGcRootFieldLoad(
-          load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
-      break;
-    }
-    case HLoadString::LoadKind::kDexCacheViaMethod: {
-      Register current_method = locations->InAt(0).AsRegister<Register>();
-
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      GenerateGcRootFieldLoad(
-          load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-
-      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
-      __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
-      // /* GcRoot<mirror::String> */ out = out[string_index]
-      GenerateGcRootFieldLoad(
-          load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-      break;
-    }
     default:
-      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
-      UNREACHABLE();
+      break;
   }
 
-  if (!load->IsInDexCache()) {
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
-    codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
-    __ Bind(slow_path->GetExitLabel());
-  }
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
+  codegen_->AddSlowPath(slow_path);
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
 }
 
 static Address GetExceptionTlsAddress() {
-  return Address::Absolute(Thread::ExceptionOffset<kX86WordSize>().Int32Value());
+  return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
 }
 
 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
@@ -6110,7 +6260,7 @@
 
 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6125,8 +6275,8 @@
 
 static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   return kEmitCompilerReadBarrier &&
-      (kUseBakerReadBarrier ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+      !kUseBakerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
        type_check_kind == TypeCheckKind::kArrayObjectCheck);
 }
@@ -6187,7 +6337,7 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
@@ -6409,7 +6559,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -6445,8 +6595,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -6485,8 +6634,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6518,8 +6666,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -6527,8 +6674,7 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6562,7 +6708,7 @@
 
 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6719,17 +6865,17 @@
                                                                    Location maybe_temp) {
   Register out_reg = out.AsRegister<Register>();
   if (kEmitCompilerReadBarrier) {
-    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+          instruction, out, out_reg, offset, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
       // Save the value of `out` into `maybe_temp` before overwriting it
       // in the following move operation, as we will need it for the
       // read barrier below.
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       __ movl(maybe_temp.AsRegister<Register>(), out_reg);
       // /* HeapReference<Object> */ out = *(out + offset)
       __ movl(out_reg, Address(out_reg, offset));
@@ -6746,17 +6892,15 @@
 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                                                     Location out,
                                                                     Location obj,
-                                                                    uint32_t offset,
-                                                                    Location maybe_temp) {
+                                                                    uint32_t offset) {
   Register out_reg = out.AsRegister<Register>();
   Register obj_reg = obj.AsRegister<Register>();
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
-      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+          instruction, out, obj_reg, offset, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6799,12 +6943,12 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      // Slow path used to mark the GC root `root`.
-      SlowPathCode* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root);
+      // Slow path marking the GC root `root`.
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
+          instruction, root, /* unpoison */ false);
       codegen_->AddSlowPath(slow_path);
 
-      __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()),
+      __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
                     Immediate(0));
       __ j(kNotEqual, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
@@ -6835,14 +6979,13 @@
                                                              Location ref,
                                                              Register obj,
                                                              uint32_t offset,
-                                                             Location temp,
                                                              bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Address src(obj, offset);
-  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
 }
 
 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6850,24 +6993,25 @@
                                                              Register obj,
                                                              uint32_t data_offset,
                                                              Location index,
-                                                             Location temp,
                                                              bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   Address src = index.IsConstant() ?
       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
       Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
-  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
 }
 
 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                  Location ref,
                                                                  Register obj,
                                                                  const Address& src,
-                                                                 Location temp,
                                                                  bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
@@ -6897,23 +7041,23 @@
   //   performance reasons.
 
   Register ref_reg = ref.AsRegister<Register>();
-  Register temp_reg = temp.AsRegister<Register>();
   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
-  // /* int32_t */ monitor = obj->monitor_
-  __ movl(temp_reg, Address(obj, monitor_offset));
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+  constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+  constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  // At this point, just do the "if" and make sure that flags are preserved until the branch.
+  __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
   if (needs_null_check) {
     MaybeRecordImplicitNullCheck(instruction);
   }
-  // /* LockWord */ lock_word = LockWord(monitor)
-  static_assert(sizeof(LockWord) == sizeof(int32_t),
-                "art::LockWord and int32_t have different sizes.");
-  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
-  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
-  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
-  static_assert(
-      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
-      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
 
   // Load fence to prevent load-load reordering.
   // Note that this is a no-op, thanks to the x86 memory model.
@@ -6921,20 +7065,20 @@
 
   // The actual reference load.
   // /* HeapReference<Object> */ ref = *src
-  __ movl(ref_reg, src);
+  __ movl(ref_reg, src);  // Flags are unaffected.
+
+  // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
+  // Slow path marking the object `ref` when it is gray.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
+      instruction, ref, /* unpoison */ true);
+  AddSlowPath(slow_path);
+
+  // We have done the "if" of the gray bit check above, now branch based on the flags.
+  __ j(kNotZero, slow_path->GetEntryLabel());
 
   // Object* ref = ref_addr->AsMirrorPtr()
   __ MaybeUnpoisonHeapReference(ref_reg);
 
-  // Slow path used to mark the object `ref` when it is gray.
-  SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref);
-  AddSlowPath(slow_path);
-
-  // if (rb_state == ReadBarrier::gray_ptr_)
-  //   ref = ReadBarrier::Mark(ref);
-  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
-  __ j(kEqual, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 1739eec..c644e40 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -18,8 +18,8 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
 
 #include "arch/x86/instruction_set_features_x86.h"
+#include "base/enums.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -29,7 +29,7 @@
 namespace x86 {
 
 // Use a local definition to prevent copying mistakes.
-static constexpr size_t kX86WordSize = kX86PointerSize;
+static constexpr size_t kX86WordSize = static_cast<size_t>(kX86PointerSize);
 
 class CodeGeneratorX86;
 
@@ -254,8 +254,7 @@
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
-                                         uint32_t offset,
-                                         Location maybe_temp);
+                                         uint32_t offset);
   // Generate a GC root reference load:
   //
   //   root <- *address
@@ -295,7 +294,6 @@
                                    HBasicBlock* default_block);
 
   void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
-  void GenerateIntCompare(Location lhs, Location rhs);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
@@ -336,6 +334,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   size_t GetWordSize() const OVERRIDE {
     return kX86WordSize;
   }
@@ -391,6 +395,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -398,12 +407,14 @@
       MethodReference target_method) OVERRIDE;
 
   // Generate a call to a static or direct method.
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   // Generate a call to a virtual method.
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void RecordSimplePatch();
   void RecordStringPatch(HLoadString* load_string);
+  void RecordTypePatch(HLoadClass* load_class);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
@@ -418,6 +429,8 @@
                   Register value,
                   bool value_can_be_null);
 
+  void GenerateIntCompare(Location lhs, Location rhs);
+
   void GenerateMemoryBarrier(MemBarrierKind kind);
 
   Label* GetLabelOf(HBasicBlock* block) const {
@@ -473,7 +486,6 @@
                                              Location ref,
                                              Register obj,
                                              uint32_t offset,
-                                             Location temp,
                                              bool needs_null_check);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
@@ -482,8 +494,14 @@
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
-                                             Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 const Address& src,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -538,7 +556,7 @@
   // touch (but not change) the top of the stack.
   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
   void MemoryFence(bool non_temporal = false) {
-    if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
+    if (!non_temporal) {
       assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
     } else {
       assembler_.mfence();
@@ -554,15 +572,6 @@
   static constexpr int32_t kDummy32BitOffset = 256;
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 Register obj,
-                                                 const Address& src,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   struct PcRelativeDexCacheAccessInfo {
@@ -593,6 +602,8 @@
   ArenaDeque<Label> simple_patches_;
   // String patch locations.
   ArenaDeque<StringPatchInfo<Label>> string_patches_;
+  // Type patch locations.
+  ArenaDeque<TypePatchInfo<Label>> type_patches_;
 
   // Offset to the start of the constant area in the assembled code.
   // Used for fixups to the constant area.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 28b52a1..ec37e5d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -51,8 +51,9 @@
 
 static constexpr int kC2ConditionMask = 0x400;
 
-#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
 
 class NullCheckSlowPathX86_64 : public SlowPathCode {
  public:
@@ -148,13 +149,11 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, instruction_->GetLocations());
     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
@@ -193,20 +192,41 @@
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
+    // Are we using an array length from memory?
+    HInstruction* array_length = instruction_->InputAt(1);
+    Location length_loc = locations->InAt(1);
+    InvokeRuntimeCallingConvention calling_convention;
+    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+      // Load the array length into our temporary.
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+      // Check for conflicts with index.
+      if (length_loc.Equals(locations->InAt(0))) {
+        // We know we aren't using parameter 2.
+        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      }
+      __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
+    }
+
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
-    InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
         locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        locations->InAt(1),
+        length_loc,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    x86_64_codegen->InvokeRuntime(entry_point_offset,
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -347,7 +367,7 @@
                                     dex_pc,
                                     this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -446,8 +466,8 @@
 // Slow path marking an object during a read barrier.
 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
  public:
-  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
-      : SlowPathCode(instruction), out_(out), obj_(obj) {
+  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison)
+      : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -455,39 +475,56 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg_out = out_.AsRegister<Register>();
+    Register reg = obj_.AsRegister<Register>();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
+    if (unpoison_) {
+      // Object* ref = ref_addr->AsMirrorPtr()
+      __ MaybeUnpoisonHeapReference(obj_.AsRegister<CpuRegister>());
+    }
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
-    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
-    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
-    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
-
-    RestoreLiveRegisters(codegen, locations);
+    DCHECK_NE(reg, RSP);
+    DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in R0):
+    //
+    //   RDI <- obj
+    //   RAX <- ReadBarrierMark(RDI)
+    //   obj <- RAX
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg);
+    // This runtime call does not require a stack map.
+    x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
   }
 
  private:
-  const Location out_;
   const Location obj_;
+  const bool unpoison_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
 };
@@ -527,9 +564,12 @@
     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()))
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -541,7 +581,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute real offset and store it in index_.
         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
@@ -589,7 +629,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -710,7 +754,8 @@
 };
 
 #undef __
-#define __ down_cast<X86_64Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
 
 inline Condition X86_64IntegerCondition(IfCondition cond) {
   switch (cond) {
@@ -762,10 +807,9 @@
   }
 }
 
-void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                     Location temp) {
+Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                     Location temp) {
   // All registers are assumed to be correctly set up.
-
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -815,6 +859,13 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                     Location temp) {
+  // All registers are assumed to be correctly set up.
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -836,7 +887,7 @@
       // (callee_method + offset_of_quick_compiled_code)()
       __ call(Address(callee_method.AsRegister<CpuRegister>(),
                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                          kX86_64WordSize).SizeValue()));
+                          kX86_64PointerSize).SizeValue()));
       break;
   }
 
@@ -871,7 +922,7 @@
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kX86_64WordSize).SizeValue()));
+      kX86_64PointerSize).SizeValue()));
 }
 
 void CodeGeneratorX86_64::RecordSimplePatch() {
@@ -886,6 +937,11 @@
   __ Bind(&string_patches_.back().label);
 }
 
+void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) {
+  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex());
+  __ Bind(&type_patches_.back().label);
+}
+
 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                             uint32_t element_offset) {
   // Add a patch entry and return the label.
@@ -900,7 +956,8 @@
       relative_call_patches_.size() +
       pc_relative_dex_cache_patches_.size() +
       simple_patches_.size() +
-      string_patches_.size();
+      string_patches_.size() +
+      type_patches_.size();
   linker_patches->reserve(size);
   // The label points to the end of the "movl" insn but the literal offset for method
   // patch needs to point to the embedded constant which occupies the last 4 bytes.
@@ -936,6 +993,14 @@
                                                                info.label.Position(),
                                                                info.string_index));
   }
+  for (const TypePatchInfo<Label>& info : type_patches_) {
+    // These are always PC-relative, see GetSupportedLoadClassKind().
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset,
+                                                             &info.dex_file,
+                                                             info.label.Position(),
+                                                             info.type_index));
+  }
 }
 
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -970,7 +1035,7 @@
                                         HInstruction* instruction,
                                         uint32_t dex_pc,
                                         SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -985,6 +1050,13 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                              HInstruction* instruction,
+                                                              SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
+}
+
 static constexpr int kNumberOfCpuRegisterPairs = 0;
 // Use a fake return address register to mimic Quick.
 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
@@ -1015,6 +1087,7 @@
         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -2250,12 +2323,12 @@
       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
+      invoke->GetImtIndex(), kX86_64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp,
-                  ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
+  __ call(Address(
+      temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -3394,16 +3467,6 @@
 
     __ movl(numerator, eax);
 
-    NearLabel no_div;
-    NearLabel end;
-    __ testl(eax, eax);
-    __ j(kNotEqual, &no_div);
-
-    __ xorl(out, out);
-    __ jmp(&end);
-
-    __ Bind(&no_div);
-
     __ movl(eax, Immediate(magic));
     __ imull(numerator);
 
@@ -3429,7 +3492,6 @@
     } else {
       __ movl(eax, edx);
     }
-    __ Bind(&end);
   } else {
     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
 
@@ -3887,7 +3949,7 @@
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3904,7 +3966,7 @@
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
-    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
     __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
     __ call(Address(temp, code_offset.SizeValue()));
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -3920,7 +3982,7 @@
 
 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(RAX));
@@ -3987,7 +4049,7 @@
             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
   } else {
     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
+        instruction->GetIndex(), kX86_64PointerSize));
     __ movq(locations->Out().AsRegister<CpuRegister>(),
             Address(locations->InAt(0).AsRegister<CpuRegister>(),
             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
@@ -4040,7 +4102,7 @@
 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
@@ -4067,8 +4129,10 @@
       // nop
       break;
     }
-    default:
-      LOG(FATAL) << "Unexpected memory barier " << kind;
+    case MemBarrierKind::kNTStoreStore:
+      // Non-Temporal Store/Store needs an explicit fence.
+      MemoryFence(/* non-temporal */ true);
+      break;
   }
 }
 
@@ -4093,11 +4157,6 @@
         Location::RequiresRegister(),
         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
-  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
-    // We need a temporary register for the read barrier marking slow
-    // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
-    locations->AddTemp(Location::RequiresRegister());
-  }
 }
 
 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -4141,11 +4200,10 @@
     case Primitive::kPrimNot: {
       // /* HeapReference<Object> */ out = *(base + offset)
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-        Location temp_loc = locations->GetTemp(0);
         // Note that a potential implicit null check is handled in this
         // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
         codegen_->GenerateFieldLoadWithBakerReadBarrier(
-            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+            instruction, out, base, offset, /* needs_null_check */ true);
         if (is_volatile) {
           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
         }
@@ -4529,11 +4587,6 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
-  // We need a temporary register for the read barrier marking slow
-  // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
-  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4542,11 +4595,11 @@
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
   Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
@@ -4558,7 +4611,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
@@ -4570,7 +4622,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
@@ -4582,7 +4633,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
@@ -4594,7 +4644,6 @@
     }
 
     case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
@@ -4609,15 +4658,13 @@
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       // /* HeapReference<Object> */ out =
       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-        Location temp = locations->GetTemp(0);
         // Note that a potential implicit null check is handled in this
         // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
         codegen_->GenerateArrayLoadWithBakerReadBarrier(
-            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+            instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
       } else {
         CpuRegister out = out_loc.AsRegister<CpuRegister>();
         if (index.IsConstant()) {
@@ -4643,7 +4690,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movq(out, Address(obj,
@@ -4655,7 +4701,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
@@ -4667,7 +4712,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
@@ -4968,12 +5012,18 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   __ movl(out, Address(obj, offset));
@@ -4986,7 +5036,10 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  HInstruction* length = instruction->InputAt(1);
+  if (!length->IsEmittedAtUseSite()) {
+    locations->SetInAt(1, Location::RegisterOrConstant(length));
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -4996,8 +5049,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
-  SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -5020,12 +5072,28 @@
     codegen_->AddSlowPath(slow_path);
     __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    CpuRegister length = length_loc.AsRegister<CpuRegister>();
-    if (index_loc.IsConstant()) {
-      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-      __ cmpl(length, Immediate(value));
+    HInstruction* array_length = instruction->InputAt(1);
+    if (array_length->IsEmittedAtUseSite()) {
+      // Address the length field in the array.
+      DCHECK(array_length->IsArrayLength());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(array_len, Immediate(value));
+      } else {
+        __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+      }
+      codegen_->MaybeRecordImplicitNullCheck(array_length);
     } else {
-      __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+      CpuRegister length = length_loc.AsRegister<CpuRegister>();
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(length, Immediate(value));
+      } else {
+        __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+      }
     }
     codegen_->AddSlowPath(slow_path);
     __ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -5042,7 +5110,7 @@
     __ testl(value, value);
     __ j(kEqual, &is_null);
   }
-  __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
+  __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
                                         /* no_rip */ true));
   __ movq(temp, object);
   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
@@ -5094,7 +5162,7 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
+  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
                                   /* no_rip */ true),
                 Immediate(0));
   if (successor == nullptr) {
@@ -5319,13 +5387,64 @@
   // No need for memory fence, thanks to the x86-64 memory model.
 }
 
+HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      // We prefer the always-available RIP-relative address for the x86-64 boot image.
+      return HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(RAX),
-      /* code_generator_supports_read_barrier */ true);
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(RAX),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -5342,37 +5461,86 @@
 
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
-  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
 
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    GenerateGcRootFieldLoad(
-        cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    // /* GcRoot<mirror::Class>[] */ out =
-    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-    __ movq(out, Address(current_method,
-                         ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-    // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(
-        cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
-          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ testl(out, out);
-        __ j(kEqual, slow_path->GetEntryLabel());
-      }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
+      codegen_->RecordTypePatch(cls);
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ movl(out, Immediate(address));  // Zero-extended.
+      codegen_->RecordSimplePatch();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      // /* GcRoot<mirror::Class> */ out = *address
+      if (IsUint<32>(cls->GetAddress())) {
+        Address address = Address::Absolute(cls->GetAddress(), /* no_rip */ true);
+        GenerateGcRootFieldLoad(cls, out_loc, address);
       } else {
-        __ Bind(slow_path->GetExitLabel());
+        // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
+        __ movq(out, Immediate(cls->GetAddress()));
+        GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0));
       }
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      uint32_t offset = cls->GetDexCacheElementOffset();
+      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
+      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
+                                          /* no_rip */ false);
+      // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+      __ movq(out,
+              Address(current_method,
+                      ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
+      UNREACHABLE();
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ testl(out, out);
+      __ j(kEqual, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -5461,55 +5629,19 @@
       codegen_->RecordSimplePatch();
       return;  // No dex cache slow path.
     }
-    case HLoadString::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(load->GetAddress(), 0u);
-      if (IsUint<32>(load->GetAddress())) {
-        Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
-        GenerateGcRootFieldLoad(load, out_loc, address);
-      } else {
-        // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
-        __ movq(out, Immediate(load->GetAddress()));
-        GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
-      }
-      break;
-    }
-    case HLoadString::LoadKind::kDexCachePcRelative: {
-      uint32_t offset = load->GetDexCacheElementOffset();
-      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
-      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
-                                          /* no_rip */ false);
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
-      break;
-    }
-    case HLoadString::LoadKind::kDexCacheViaMethod: {
-      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
-
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      GenerateGcRootFieldLoad(
-          load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
-      __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-      // /* GcRoot<mirror::String> */ out = out[string_index]
-      GenerateGcRootFieldLoad(
-          load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-      break;
-    }
     default:
-      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
-      UNREACHABLE();
+      break;
   }
 
-  if (!load->IsInDexCache()) {
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
-    codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
-    __ Bind(slow_path->GetExitLabel());
-  }
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
+  codegen_->AddSlowPath(slow_path);
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
 }
 
 static Address GetExceptionTlsAddress() {
-  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
+  return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
                            /* no_rip */ true);
 }
 
@@ -5533,7 +5665,7 @@
 
 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -5548,8 +5680,8 @@
 
 static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   return kEmitCompilerReadBarrier &&
-      (kUseBakerReadBarrier ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+      !kUseBakerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
        type_check_kind == TypeCheckKind::kArrayObjectCheck);
 }
@@ -5610,7 +5742,7 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
@@ -5840,8 +5972,7 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
 
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
@@ -5865,8 +5996,7 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
 
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
@@ -5886,8 +6016,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -5911,8 +6040,7 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
 
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
@@ -5938,8 +6066,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
       __ Bind(&done);
       break;
@@ -5958,8 +6085,7 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
 
       // Do an exact check.
       NearLabel check_non_primitive_component_type;
@@ -5987,8 +6113,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -5996,8 +6121,7 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
       __ Bind(&done);
       break;
@@ -6013,8 +6137,7 @@
       }
 
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
 
       // We always go into the type check slow path for the unresolved
       // and interface check cases.
@@ -6043,7 +6166,7 @@
 
 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6182,17 +6305,17 @@
                                                                       Location maybe_temp) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   if (kEmitCompilerReadBarrier) {
-    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+          instruction, out, out_reg, offset, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
       // Save the value of `out` into `maybe_temp` before overwriting it
       // in the following move operation, as we will need it for the
       // read barrier below.
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
       // /* HeapReference<Object> */ out = *(out + offset)
       __ movl(out_reg, Address(out_reg, offset));
@@ -6209,17 +6332,15 @@
 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                                                        Location out,
                                                                        Location obj,
-                                                                       uint32_t offset,
-                                                                       Location maybe_temp) {
+                                                                       uint32_t offset) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
-      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
       codegen_->GenerateFieldLoadWithBakerReadBarrier(
-          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+          instruction, out, obj_reg, offset, /* needs_null_check */ false);
     } else {
       // Load with slow path based read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6262,12 +6383,12 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      // Slow path used to mark the GC root `root`.
-      SlowPathCode* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
+      // Slow path marking the GC root `root`.
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
+          instruction, root, /* unpoison */ false);
       codegen_->AddSlowPath(slow_path);
 
-      __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
+      __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
                                       /* no_rip */ true),
                     Immediate(0));
       __ j(kNotEqual, slow_path->GetEntryLabel());
@@ -6299,14 +6420,13 @@
                                                                 Location ref,
                                                                 CpuRegister obj,
                                                                 uint32_t offset,
-                                                                Location temp,
                                                                 bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Address src(obj, offset);
-  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
 }
 
 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6314,24 +6434,25 @@
                                                                 CpuRegister obj,
                                                                 uint32_t data_offset,
                                                                 Location index,
-                                                                Location temp,
                                                                 bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   Address src = index.IsConstant() ?
       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
       Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
-  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
 }
 
 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                     Location ref,
                                                                     CpuRegister obj,
                                                                     const Address& src,
-                                                                    Location temp,
                                                                     bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
@@ -6361,23 +6482,23 @@
   //   performance reasons.
 
   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
-  CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
-  // /* int32_t */ monitor = obj->monitor_
-  __ movl(temp_reg, Address(obj, monitor_offset));
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+  constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+  constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  // At this point, just do the "if" and make sure that flags are preserved until the branch.
+  __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
   if (needs_null_check) {
     MaybeRecordImplicitNullCheck(instruction);
   }
-  // /* LockWord */ lock_word = LockWord(monitor)
-  static_assert(sizeof(LockWord) == sizeof(int32_t),
-                "art::LockWord and int32_t have different sizes.");
-  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
-  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
-  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
-  static_assert(
-      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
-      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
 
   // Load fence to prevent load-load reordering.
   // Note that this is a no-op, thanks to the x86-64 memory model.
@@ -6385,20 +6506,20 @@
 
   // The actual reference load.
   // /* HeapReference<Object> */ ref = *src
-  __ movl(ref_reg, src);
+  __ movl(ref_reg, src);  // Flags are unaffected.
+
+  // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
+  // Slow path marking the object `ref` when it is gray.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
+      instruction, ref, /* unpoison */ true);
+  AddSlowPath(slow_path);
+
+  // We have done the "if" of the gray bit check above, now branch based on the flags.
+  __ j(kNotZero, slow_path->GetEntryLabel());
 
   // Object* ref = ref_addr->AsMirrorPtr()
   __ MaybeUnpoisonHeapReference(ref_reg);
 
-  // Slow path used to mark the object `ref` when it is gray.
-  SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
-  AddSlowPath(slow_path);
-
-  // if (rb_state == ReadBarrier::gray_ptr_)
-  //   ref = ReadBarrier::Mark(ref);
-  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
-  __ j(kEqual, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 3a211c5..44844ac 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -19,7 +19,6 @@
 
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -29,7 +28,7 @@
 namespace x86_64 {
 
 // Use a local definition to prevent copying mistakes.
-static constexpr size_t kX86_64WordSize = kX86_64PointerSize;
+static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize);
 
 // Some x86_64 instructions require a register to be available as temp.
 static constexpr Register TMP = R11;
@@ -249,8 +248,7 @@
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
-                                         uint32_t offset,
-                                         Location maybe_temp);
+                                         uint32_t offset);
   // Generate a GC root reference load:
   //
   //   root <- *address
@@ -318,6 +316,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   size_t GetWordSize() const OVERRIDE {
     return kX86_64WordSize;
   }
@@ -388,17 +392,24 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void RecordSimplePatch();
   void RecordStringPatch(HLoadString* load_string);
+  void RecordTypePatch(HLoadClass* load_class);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
@@ -415,7 +426,6 @@
                                              Location ref,
                                              CpuRegister obj,
                                              uint32_t offset,
-                                             Location temp,
                                              bool needs_null_check);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
@@ -424,8 +434,14 @@
                                              CpuRegister obj,
                                              uint32_t data_offset,
                                              Location index,
-                                             Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 CpuRegister obj,
+                                                 const Address& src,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -509,10 +525,10 @@
 
   // Ensure that prior stores complete to memory before subsequent loads.
   // The locked add implementation will avoid serializing device memory, but will
-  // touch (but not change) the top of the stack. The locked add should not be used for
-  // ordering non-temporal stores.
+  // touch (but not change) the top of the stack.
+  // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
   void MemoryFence(bool force_mfence = false) {
-    if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
+    if (!force_mfence) {
       assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
     } else {
       assembler_.mfence();
@@ -528,15 +544,6 @@
   static constexpr int32_t kDummy32BitOffset = 256;
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 CpuRegister obj,
-                                                 const Address& src,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
         : target_dex_file(dex_file), element_offset(element_off), label() { }
@@ -568,6 +575,8 @@
   ArenaDeque<Label> simple_patches_;
   // String patch locations.
   ArenaDeque<StringPatchInfo<Label>> string_patches_;
+  // Type patch locations.
+  ArenaDeque<TypePatchInfo<Label>> type_patches_;
 
   // Fixups for jump tables need to be handled specially.
   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6be79fa..fe6c0a3 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -29,12 +29,6 @@
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "base/macros.h"
 #include "builder.h"
-#include "code_generator_arm.h"
-#include "code_generator_arm64.h"
-#include "code_generator_mips.h"
-#include "code_generator_mips64.h"
-#include "code_generator_x86.h"
-#include "code_generator_x86_64.h"
 #include "code_simulator_container.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
@@ -44,7 +38,7 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "utils.h"
 #include "utils/arm/managed_register_arm.h"
@@ -52,10 +46,35 @@
 #include "utils/mips64/managed_register_mips64.h"
 #include "utils/x86/managed_register_x86.h"
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
 #include "gtest/gtest.h"
 
 namespace art {
 
+#ifdef ART_ENABLE_CODEGEN_arm
 // Provide our own codegen, that ensures the C calling conventions
 // are preserved. Currently, ART and C do not match as R4 is caller-save
 // in ART, and callee-save in C. Alternatively, we could use or write
@@ -80,7 +99,9 @@
     blocked_register_pairs_[arm::R6_R7] = false;
   }
 };
+#endif
 
+#ifdef ART_ENABLE_CODEGEN_x86
 class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
  public:
   TestCodeGeneratorX86(HGraph* graph,
@@ -105,6 +126,7 @@
     blocked_register_pairs_[x86::ECX_EDI] = false;
   }
 };
+#endif
 
 class InternalCodeAllocator : public CodeAllocator {
  public:
@@ -219,7 +241,7 @@
 
   PrepareForRegisterAllocation(graph).Run();
   liveness.Analyze();
-  RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+  RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
   hook_before_codegen(graph);
 
   InternalCodeAllocator allocator;
@@ -234,37 +256,54 @@
                     bool has_result,
                     Expected expected) {
   CompilerOptions compiler_options;
+#ifdef ART_ENABLE_CODEGEN_arm
   if (target_isa == kArm || target_isa == kThumb2) {
     std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
         ArmInstructionSetFeatures::FromCppDefines());
     TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
     RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
-  } else if (target_isa == kArm64) {
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+  if (target_isa == kArm64) {
     std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
         Arm64InstructionSetFeatures::FromCppDefines());
     arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
     RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
-  } else if (target_isa == kX86) {
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+  if (target_isa == kX86) {
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
-    x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
+    TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
     RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
-  } else if (target_isa == kX86_64) {
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+  if (target_isa == kX86_64) {
     std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
     RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
-  } else if (target_isa == kMips) {
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+  if (target_isa == kMips) {
     std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
         MipsInstructionSetFeatures::FromCppDefines());
     mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
     RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
-  } else if (target_isa == kMips64) {
+  }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+  if (target_isa == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
     mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
     RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
   }
+#endif
 }
 
 static ::std::vector<InstructionSet> GetTargetISAs() {
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index a849448..cc949c5 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -21,8 +21,14 @@
 #include "locations.h"
 #include "nodes.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+#pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
@@ -34,87 +40,88 @@
 
 static inline int VIXLRegCodeFromART(int code) {
   if (code == SP) {
-    return vixl::kSPRegInternalCode;
+    return vixl::aarch64::kSPRegInternalCode;
   }
   if (code == XZR) {
-    return vixl::kZeroRegCode;
+    return vixl::aarch64::kZeroRegCode;
   }
   return code;
 }
 
 static inline int ARTRegCodeFromVIXL(int code) {
-  if (code == vixl::kSPRegInternalCode) {
+  if (code == vixl::aarch64::kSPRegInternalCode) {
     return SP;
   }
-  if (code == vixl::kZeroRegCode) {
+  if (code == vixl::aarch64::kZeroRegCode) {
     return XZR;
   }
   return code;
 }
 
-static inline vixl::Register XRegisterFrom(Location location) {
+static inline vixl::aarch64::Register XRegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
-  return vixl::Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
+  return vixl::aarch64::Register::GetXRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
-static inline vixl::Register WRegisterFrom(Location location) {
+static inline vixl::aarch64::Register WRegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
-  return vixl::Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
+  return vixl::aarch64::Register::GetWRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
-static inline vixl::Register RegisterFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) {
   DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
 }
 
-static inline vixl::Register OutputRegister(HInstruction* instr) {
+static inline vixl::aarch64::Register OutputRegister(HInstruction* instr) {
   return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
 
-static inline vixl::Register InputRegisterAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) {
   return RegisterFrom(instr->GetLocations()->InAt(input_index),
                       instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::FPRegister DRegisterFrom(Location location) {
+static inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
   DCHECK(location.IsFpuRegister()) << location;
-  return vixl::FPRegister::DRegFromCode(location.reg());
+  return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg());
 }
 
-static inline vixl::FPRegister SRegisterFrom(Location location) {
+static inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
   DCHECK(location.IsFpuRegister()) << location;
-  return vixl::FPRegister::SRegFromCode(location.reg());
+  return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg());
 }
 
-static inline vixl::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
   DCHECK(Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location);
 }
 
-static inline vixl::FPRegister OutputFPRegister(HInstruction* instr) {
+static inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) {
   return FPRegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
 
-static inline vixl::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
   return FPRegisterFrom(instr->GetLocations()->InAt(input_index),
                         instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
-  return Primitive::IsFloatingPointType(type) ? vixl::CPURegister(FPRegisterFrom(location, type))
-                                              : vixl::CPURegister(RegisterFrom(location, type));
+static inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
+  return Primitive::IsFloatingPointType(type)
+      ? vixl::aarch64::CPURegister(FPRegisterFrom(location, type))
+      : vixl::aarch64::CPURegister(RegisterFrom(location, type));
 }
 
-static inline vixl::CPURegister OutputCPURegister(HInstruction* instr) {
+static inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) {
   return Primitive::IsFloatingPointType(instr->GetType())
-      ? static_cast<vixl::CPURegister>(OutputFPRegister(instr))
-      : static_cast<vixl::CPURegister>(OutputRegister(instr));
+      ? static_cast<vixl::aarch64::CPURegister>(OutputFPRegister(instr))
+      : static_cast<vixl::aarch64::CPURegister>(OutputRegister(instr));
 }
 
-static inline vixl::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
+static inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
   return Primitive::IsFloatingPointType(instr->InputAt(index)->GetType())
-      ? static_cast<vixl::CPURegister>(InputFPRegisterAt(instr, index))
-      : static_cast<vixl::CPURegister>(InputRegisterAt(instr, index));
+      ? static_cast<vixl::aarch64::CPURegister>(InputFPRegisterAt(instr, index))
+      : static_cast<vixl::aarch64::CPURegister>(InputRegisterAt(instr, index));
 }
 
 static inline int64_t Int64ConstantFrom(Location location) {
@@ -129,63 +136,70 @@
   }
 }
 
-static inline vixl::Operand OperandFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) {
   if (location.IsRegister()) {
-    return vixl::Operand(RegisterFrom(location, type));
+    return vixl::aarch64::Operand(RegisterFrom(location, type));
   } else {
-    return vixl::Operand(Int64ConstantFrom(location));
+    return vixl::aarch64::Operand(Int64ConstantFrom(location));
   }
 }
 
-static inline vixl::Operand InputOperandAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) {
   return OperandFrom(instr->GetLocations()->InAt(input_index),
                      instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::MemOperand StackOperandFrom(Location location) {
-  return vixl::MemOperand(vixl::sp, location.GetStackIndex());
+static inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
+  return vixl::aarch64::MemOperand(vixl::aarch64::sp, location.GetStackIndex());
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base, size_t offset = 0) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    size_t offset = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
-  return vixl::MemOperand(base.X(), offset);
+  return vixl::aarch64::MemOperand(base.X(), offset);
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base,
-                                           const vixl::Register& regoffset,
-                                           vixl::Shift shift = vixl::LSL,
-                                           unsigned shift_amount = 0) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    const vixl::aarch64::Register& regoffset,
+                                                    vixl::aarch64::Shift shift = vixl::aarch64::LSL,
+                                                    unsigned shift_amount = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
-  return vixl::MemOperand(base.X(), regoffset, shift, shift_amount);
+  return vixl::aarch64::MemOperand(base.X(), regoffset, shift, shift_amount);
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base, Offset offset) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    Offset offset) {
   return HeapOperand(base, offset.SizeValue());
 }
 
-static inline vixl::MemOperand HeapOperandFrom(Location location, Offset offset) {
+static inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) {
   return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset);
 }
 
-static inline Location LocationFrom(const vixl::Register& reg) {
-  return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.code()));
+static inline Location LocationFrom(const vixl::aarch64::Register& reg) {
+  return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.GetCode()));
 }
 
-static inline Location LocationFrom(const vixl::FPRegister& fpreg) {
-  return Location::FpuRegisterLocation(fpreg.code());
+static inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) {
+  return Location::FpuRegisterLocation(fpreg.GetCode());
 }
 
-static inline vixl::Operand OperandFromMemOperand(const vixl::MemOperand& mem_op) {
+static inline vixl::aarch64::Operand OperandFromMemOperand(
+    const vixl::aarch64::MemOperand& mem_op) {
   if (mem_op.IsImmediateOffset()) {
-    return vixl::Operand(mem_op.offset());
+    return vixl::aarch64::Operand(mem_op.GetOffset());
   } else {
     DCHECK(mem_op.IsRegisterOffset());
-    if (mem_op.extend() != vixl::NO_EXTEND) {
-      return vixl::Operand(mem_op.regoffset(), mem_op.extend(), mem_op.shift_amount());
-    } else if (mem_op.shift() != vixl::NO_SHIFT) {
-      return vixl::Operand(mem_op.regoffset(), mem_op.shift(), mem_op.shift_amount());
+    if (mem_op.GetExtend() != vixl::aarch64::NO_EXTEND) {
+      return vixl::aarch64::Operand(mem_op.GetRegisterOffset(),
+                                    mem_op.GetExtend(),
+                                    mem_op.GetShiftAmount());
+    } else if (mem_op.GetShift() != vixl::aarch64::NO_SHIFT) {
+      return vixl::aarch64::Operand(mem_op.GetRegisterOffset(),
+                                    mem_op.GetShift(),
+                                    mem_op.GetShiftAmount());
     } else {
       LOG(FATAL) << "Should not reach here";
       UNREACHABLE();
@@ -212,13 +226,13 @@
 
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
     // Uses logical operations.
-    return vixl::Assembler::IsImmLogical(value, vixl::kXRegSize);
+    return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize);
   } else if (instr->IsNeg()) {
     // Uses mov -immediate.
-    return vixl::Assembler::IsImmMovn(value, vixl::kXRegSize);
+    return vixl::aarch64::Assembler::IsImmMovn(value, vixl::aarch64::kXRegSize);
   } else {
     DCHECK(instr->IsAdd() ||
-           instr->IsArm64IntermediateAddress() ||
+           instr->IsIntermediateAddress() ||
            instr->IsBoundsCheck() ||
            instr->IsCompare() ||
            instr->IsCondition() ||
@@ -227,7 +241,8 @@
     // Uses aliases of ADD/SUB instructions.
     // If `value` does not fit but `-value` does, VIXL will automatically use
     // the 'opposite' instruction.
-    return vixl::Assembler::IsImmAddSub(value) || vixl::Assembler::IsImmAddSub(-value);
+    return vixl::aarch64::Assembler::IsImmAddSub(value)
+        || vixl::aarch64::Assembler::IsImmAddSub(-value);
   }
 }
 
@@ -263,30 +278,30 @@
   return true;
 }
 
-static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
   switch (op_kind) {
-    case HArm64DataProcWithShifterOp::kASR: return vixl::ASR;
-    case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL;
-    case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR;
+    case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
+    case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
+    case HArm64DataProcWithShifterOp::kLSR: return vixl::aarch64::LSR;
     default:
       LOG(FATAL) << "Unexpected op kind " << op_kind;
       UNREACHABLE();
-      return vixl::NO_SHIFT;
+      return vixl::aarch64::NO_SHIFT;
   }
 }
 
-static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
   switch (op_kind) {
-    case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB;
-    case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH;
-    case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW;
-    case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB;
-    case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH;
-    case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW;
+    case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
+    case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
+    case HArm64DataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW;
+    case HArm64DataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB;
+    case HArm64DataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH;
+    case HArm64DataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW;
     default:
       LOG(FATAL) << "Unexpected op kind " << op_kind;
       UNREACHABLE();
-      return vixl::NO_EXTEND;
+      return vixl::aarch64::NO_EXTEND;
   }
 }
 
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 49cfff4..e1bde7c 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -88,13 +88,207 @@
   }
 }
 
-void HDeadCodeElimination::RemoveDeadBlocks() {
-  if (graph_->HasIrreducibleLoops()) {
-    // Do not eliminate dead blocks if the graph has irreducible loops. We could
-    // support it, but that would require changes in our loop representation to handle
-    // multiple entry points. We decided it was not worth the complexity.
-    return;
+void HDeadCodeElimination::MaybeRecordSimplifyIf() {
+  if (stats_ != nullptr) {
+    stats_->RecordStat(MethodCompilationStat::kSimplifyIf);
   }
+}
+
+static bool HasInput(HCondition* instruction, HInstruction* input) {
+  return (instruction->InputAt(0) == input) ||
+         (instruction->InputAt(1) == input);
+}
+
+static bool HasEquality(IfCondition condition) {
+  switch (condition) {
+    case kCondEQ:
+    case kCondLE:
+    case kCondGE:
+    case kCondBE:
+    case kCondAE:
+      return true;
+    case kCondNE:
+    case kCondLT:
+    case kCondGT:
+    case kCondB:
+    case kCondA:
+      return false;
+  }
+}
+
+static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstruction* right) {
+  if (left == right && !Primitive::IsFloatingPointType(left->GetType())) {
+    return condition->GetBlock()->GetGraph()->GetIntConstant(
+        HasEquality(condition->GetCondition()) ? 1 : 0);
+  }
+
+  if (!left->IsConstant() || !right->IsConstant()) {
+    return nullptr;
+  }
+
+  if (left->IsIntConstant()) {
+    return condition->Evaluate(left->AsIntConstant(), right->AsIntConstant());
+  } else if (left->IsNullConstant()) {
+    return condition->Evaluate(left->AsNullConstant(), right->AsNullConstant());
+  } else if (left->IsLongConstant()) {
+    return condition->Evaluate(left->AsLongConstant(), right->AsLongConstant());
+  } else if (left->IsFloatConstant()) {
+    return condition->Evaluate(left->AsFloatConstant(), right->AsFloatConstant());
+  } else {
+    DCHECK(left->IsDoubleConstant());
+    return condition->Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant());
+  }
+}
+
+// Simplify the pattern:
+//
+//        B1    B2    ...
+//       goto  goto  goto
+//         \    |    /
+//          \   |   /
+//             B3
+//     i1 = phi(input, input)
+//     (i2 = condition on i1)
+//        if i1 (or i2)
+//          /     \
+//         /       \
+//        B4       B5
+//
+// Into:
+//
+//       B1      B2    ...
+//        |      |      |
+//       B4      B5    B?
+//
+// This simplification cannot be applied for loop headers, as they
+// contain a suspend check.
+//
+// Note that we rely on the dead code elimination to get rid of B3.
+bool HDeadCodeElimination::SimplifyIfs() {
+  bool simplified_one_or_more_ifs = false;
+  bool rerun_dominance_and_loop_analysis = false;
+
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    HInstruction* last = block->GetLastInstruction();
+    HInstruction* first = block->GetFirstInstruction();
+    if (last->IsIf() &&
+        block->HasSinglePhi() &&
+        block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
+      bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
+      bool has_only_phi_condition_and_if =
+          !has_only_phi_and_if &&
+          first->IsCondition() &&
+          HasInput(first->AsCondition(), block->GetFirstPhi()) &&
+          (first->GetNext() == last) &&
+          (last->InputAt(0) == first) &&
+          first->HasOnlyOneNonEnvironmentUse();
+
+      if (has_only_phi_and_if || has_only_phi_condition_and_if) {
+        DCHECK(!block->IsLoopHeader());
+        HPhi* phi = block->GetFirstPhi()->AsPhi();
+        bool phi_input_is_left = (first->InputAt(0) == phi);
+
+        // Walk over all inputs of the phis and update the control flow of
+        // predecessors feeding constants to the phi.
+        // Note that phi->InputCount() may change inside the loop.
+        for (size_t i = 0; i < phi->InputCount();) {
+          HInstruction* input = phi->InputAt(i);
+          HInstruction* value_to_check = nullptr;
+          if (has_only_phi_and_if) {
+            if (input->IsIntConstant()) {
+              value_to_check = input;
+            }
+          } else {
+            DCHECK(has_only_phi_condition_and_if);
+            if (phi_input_is_left) {
+              value_to_check = Evaluate(first->AsCondition(), input, first->InputAt(1));
+            } else {
+              value_to_check = Evaluate(first->AsCondition(), first->InputAt(0), input);
+            }
+          }
+          if (value_to_check == nullptr) {
+            // Could not evaluate to a constant, continue iterating over the inputs.
+            ++i;
+          } else {
+            HBasicBlock* predecessor_to_update = block->GetPredecessors()[i];
+            HBasicBlock* successor_to_update = nullptr;
+            if (value_to_check->AsIntConstant()->IsTrue()) {
+              successor_to_update = last->AsIf()->IfTrueSuccessor();
+            } else {
+              DCHECK(value_to_check->AsIntConstant()->IsFalse())
+                  << value_to_check->AsIntConstant()->GetValue();
+              successor_to_update = last->AsIf()->IfFalseSuccessor();
+            }
+            predecessor_to_update->ReplaceSuccessor(block, successor_to_update);
+            phi->RemoveInputAt(i);
+            simplified_one_or_more_ifs = true;
+            if (block->IsInLoop()) {
+              rerun_dominance_and_loop_analysis = true;
+            }
+            // For simplicity, don't create a dead block, let the dead code elimination
+            // pass deal with it.
+            if (phi->InputCount() == 1) {
+              break;
+            }
+          }
+        }
+        if (block->GetPredecessors().size() == 1) {
+          phi->ReplaceWith(phi->InputAt(0));
+          block->RemovePhi(phi);
+          if (has_only_phi_condition_and_if) {
+            // Evaluate here (and not wait for a constant folding pass) to open
+            // more opportunities for DCE.
+            HInstruction* result = first->AsCondition()->TryStaticEvaluation();
+            if (result != nullptr) {
+              first->ReplaceWith(result);
+              block->RemoveInstruction(first);
+            }
+          }
+        }
+        if (simplified_one_or_more_ifs) {
+          MaybeRecordSimplifyIf();
+        }
+      }
+    }
+  }
+  // We need to re-analyze the graph in order to run DCE afterwards.
+  if (simplified_one_or_more_ifs) {
+    if (rerun_dominance_and_loop_analysis) {
+      graph_->ClearLoopInformation();
+      graph_->ClearDominanceInformation();
+      graph_->BuildDominatorTree();
+    } else {
+      graph_->ClearDominanceInformation();
+      // We have introduced critical edges, remove them.
+      graph_->SimplifyCFG();
+      graph_->ComputeDominanceInformation();
+      graph_->ComputeTryBlockInformation();
+    }
+  }
+
+  return simplified_one_or_more_ifs;
+}
+
+void HDeadCodeElimination::ConnectSuccessiveBlocks() {
+  // Order does not matter.
+  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
+    HBasicBlock* block  = it.Current();
+    if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
+      it.Advance();
+      continue;
+    }
+    HBasicBlock* successor = block->GetSingleSuccessor();
+    if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
+      it.Advance();
+      continue;
+    }
+    block->MergeWith(successor);
+    // Reiterate on this block in case it can be merged with its new successor.
+  }
+}
+
+bool HDeadCodeElimination::RemoveDeadBlocks() {
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false, kArenaAllocDCE);
@@ -132,23 +326,7 @@
       graph_->ComputeTryBlockInformation();
     }
   }
-
-  // Connect successive blocks created by dead branches. Order does not matter.
-  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
-    HBasicBlock* block  = it.Current();
-    if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
-      it.Advance();
-      continue;
-    }
-    HBasicBlock* successor = block->GetSingleSuccessor();
-    if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
-      it.Advance();
-      continue;
-    }
-    block->MergeWith(successor);
-
-    // Reiterate on this block in case it can be merged with its new successor.
-  }
+  return removed_one_or_more_blocks;
 }
 
 void HDeadCodeElimination::RemoveDeadInstructions() {
@@ -181,7 +359,20 @@
 }
 
 void HDeadCodeElimination::Run() {
-  RemoveDeadBlocks();
+  // Do not eliminate dead blocks if the graph has irreducible loops. We could
+  // support it, but that would require changes in our loop representation to handle
+  // multiple entry points. We decided it was not worth the complexity.
+  if (!graph_->HasIrreducibleLoops()) {
+    // Simplify graph to generate more dead block patterns.
+    ConnectSuccessiveBlocks();
+    bool did_any_simplification = false;
+    did_any_simplification |= SimplifyIfs();
+    did_any_simplification |= RemoveDeadBlocks();
+    if (did_any_simplification) {
+      // Connect successive blocks created by dead branches.
+      ConnectSuccessiveBlocks();
+    }
+  }
   SsaRedundantPhiElimination(graph_).Run();
   RemoveDeadInstructions();
 }
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 8d6008b..58e700d 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,18 +31,19 @@
  public:
   HDeadCodeElimination(HGraph* graph,
                        OptimizingCompilerStats* stats = nullptr,
-                       const char* name = kInitialDeadCodeEliminationPassName)
+                       const char* name = kDeadCodeEliminationPassName)
       : HOptimization(graph, name, stats) {}
 
   void Run() OVERRIDE;
-
-  static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
-  static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
+  static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination";
 
  private:
   void MaybeRecordDeadBlock(HBasicBlock* block);
-  void RemoveDeadBlocks();
+  void MaybeRecordSimplifyIf();
+  bool RemoveDeadBlocks();
   void RemoveDeadInstructions();
+  bool SimplifyIfs();
+  void ConnectSuccessiveBlocks();
 
   DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
 };
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index e9072b9..14c318e 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -44,8 +44,23 @@
   }
 
  private:
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache types array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_class->GetDexFile();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
+      // Add the special argument base to the load.
+      load_class->AddSpecialInput(base);
+    }
+  }
+
   void VisitLoadString(HLoadString* load_string) OVERRIDE {
-    // If this is a load with PC-relative access to the dex cache methods array,
+    // If this is a load with PC-relative access to the dex cache strings array,
     // we need to add the dex cache arrays base as the special input.
     if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
       // Initialize base for target dex file if needed.
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
index 015f910..9142e29 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.h
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -26,7 +26,9 @@
 class DexCacheArrayFixups : public HOptimization {
  public:
   DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+      : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats) {}
+
+  static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm";
 
   void Run() OVERRIDE;
 };
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
new file mode 100644
index 0000000..19bab08
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips.h"
+#include "dex_cache_array_fixups_mips.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace mips {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HMipsDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+    // Computing the dex cache base for PC-relative accesses will clobber RA with
+    // the NAL instruction on R2. Take a note of this before generating the method
+    // entry.
+    if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) {
+      codegen_->ClobberRA();
+    }
+  }
+
+ private:
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache types array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_class->GetDexFile();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
+      // Add the special argument base to the load.
+      load_class->AddSpecialInput(base);
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache strings array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_string->GetDexFile();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
+      // Add the special argument base to the load.
+      load_string->AddSpecialInput(base);
+    }
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    return dex_cache_array_bases_.GetOrCreate(
+        &dex_file,
+        [this, &dex_file]() {
+          HMipsDexCacheArraysBase* base =
+              new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file);
+          HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+          // Insert the base at the start of the entry block, move it to a better
+          // position later in MoveBaseIfNeeded().
+          entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+          return base;
+        });
+  }
+
+  CodeGeneratorMIPS* codegen_;
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
+  DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
new file mode 100644
index 0000000..861a199
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace mips {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats),
+        codegen_(codegen) {}
+
+  static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips";
+
+  void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 968e267..c8cba20 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -335,9 +335,7 @@
   }
 
   // Ensure the inputs of `instruction` are defined in a block of the graph.
-  for (HInputIterator input_it(instruction); !input_it.Done();
-       input_it.Advance()) {
-    HInstruction* input = input_it.Current();
+  for (HInstruction* input : instruction->GetInputs()) {
     const HInstructionList& list = input->IsPhi()
         ? input->GetBlock()->GetPhis()
         : input->GetBlock()->GetInstructions();
@@ -364,7 +362,8 @@
                             instruction->GetId()));
     }
     size_t use_index = use.GetIndex();
-    if ((use_index >= user->InputCount()) || (user->InputAt(use_index) != instruction)) {
+    HConstInputsRef user_inputs = user->GetInputs();
+    if ((use_index >= user_inputs.size()) || (user_inputs[use_index] != instruction)) {
       AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong "
                             "UseListNode index.",
                             user->DebugName(),
@@ -387,8 +386,9 @@
   }
 
   // Ensure 'instruction' has pointers to its inputs' use entries.
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
+  auto&& input_records = instruction->GetInputRecords();
+  for (size_t i = 0; i < input_records.size(); ++i) {
+    const HUserRecord<HInstruction*>& input_record = input_records[i];
     HInstruction* input = input_record.GetInstruction();
     if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
         (input_record.GetUseNode() == input->GetUses().end()) ||
@@ -490,8 +490,7 @@
   VisitInstruction(invoke);
 
   if (invoke->IsStaticWithExplicitClinitCheck()) {
-    size_t last_input_index = invoke->InputCount() - 1;
-    HInstruction* last_input = invoke->InputAt(last_input_index);
+    const HInstruction* last_input = invoke->GetInputs().back();
     if (last_input == nullptr) {
       AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
                             "has a null pointer as last input.",
@@ -665,24 +664,31 @@
   }
 }
 
-static bool IsSameSizeConstant(HInstruction* insn1, HInstruction* insn2) {
+static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* insn2) {
   return insn1->IsConstant()
       && insn2->IsConstant()
       && Primitive::Is64BitType(insn1->GetType()) == Primitive::Is64BitType(insn2->GetType());
 }
 
-static bool IsConstantEquivalent(HInstruction* insn1, HInstruction* insn2, BitVector* visited) {
+static bool IsConstantEquivalent(const HInstruction* insn1,
+                                 const HInstruction* insn2,
+                                 BitVector* visited) {
   if (insn1->IsPhi() &&
-      insn1->AsPhi()->IsVRegEquivalentOf(insn2) &&
-      insn1->InputCount() == insn2->InputCount()) {
+      insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
+    HConstInputsRef insn1_inputs = insn1->GetInputs();
+    HConstInputsRef insn2_inputs = insn2->GetInputs();
+    if (insn1_inputs.size() != insn2_inputs.size()) {
+      return false;
+    }
+
     // Testing only one of the two inputs for recursion is sufficient.
     if (visited->IsBitSet(insn1->GetId())) {
       return true;
     }
     visited->SetBit(insn1->GetId());
 
-    for (size_t i = 0, e = insn1->InputCount(); i < e; ++i) {
-      if (!IsConstantEquivalent(insn1->InputAt(i), insn2->InputAt(i), visited)) {
+    for (size_t i = 0; i < insn1_inputs.size(); ++i) {
+      if (!IsConstantEquivalent(insn1_inputs[i], insn2_inputs[i], visited)) {
         return false;
       }
     }
@@ -698,15 +704,16 @@
   VisitInstruction(phi);
 
   // Ensure the first input of a phi is not itself.
-  if (phi->InputAt(0) == phi) {
+  ArrayRef<HUserRecord<HInstruction*>> input_records = phi->GetInputRecords();
+  if (input_records[0].GetInstruction() == phi) {
     AddError(StringPrintf("Loop phi %d in block %d is its own first input.",
                           phi->GetId(),
                           phi->GetBlock()->GetBlockId()));
   }
 
   // Ensure that the inputs have the same primitive kind as the phi.
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    HInstruction* input = phi->InputAt(i);
+  for (size_t i = 0; i < input_records.size(); ++i) {
+    HInstruction* input = input_records[i].GetInstruction();
     if (Primitive::PrimitiveKind(input->GetType()) != Primitive::PrimitiveKind(phi->GetType())) {
         AddError(StringPrintf(
             "Input %d at index %zu of phi %d from block %d does not have the "
@@ -729,8 +736,7 @@
     // because we do not remove the corresponding inputs when we prove that an
     // instruction cannot throw. Instead, we at least test that all phis have the
     // same, non-zero number of inputs (b/24054676).
-    size_t input_count_this = phi->InputCount();
-    if (input_count_this == 0u) {
+    if (input_records.empty()) {
       AddError(StringPrintf("Phi %d in catch block %d has zero inputs.",
                             phi->GetId(),
                             phi->GetBlock()->GetBlockId()));
@@ -738,12 +744,12 @@
       HInstruction* next_phi = phi->GetNext();
       if (next_phi != nullptr) {
         size_t input_count_next = next_phi->InputCount();
-        if (input_count_this != input_count_next) {
+        if (input_records.size() != input_count_next) {
           AddError(StringPrintf("Phi %d in catch block %d has %zu inputs, "
                                 "but phi %d has %zu inputs.",
                                 phi->GetId(),
                                 phi->GetBlock()->GetBlockId(),
-                                input_count_this,
+                                input_records.size(),
                                 next_phi->GetId(),
                                 input_count_next));
         }
@@ -753,17 +759,17 @@
     // Ensure the number of inputs of a non-catch phi is the same as the number
     // of its predecessors.
     const ArenaVector<HBasicBlock*>& predecessors = phi->GetBlock()->GetPredecessors();
-    if (phi->InputCount() != predecessors.size()) {
+    if (input_records.size() != predecessors.size()) {
       AddError(StringPrintf(
           "Phi %d in block %d has %zu inputs, "
           "but block %d has %zu predecessors.",
-          phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(),
+          phi->GetId(), phi->GetBlock()->GetBlockId(), input_records.size(),
           phi->GetBlock()->GetBlockId(), predecessors.size()));
     } else {
       // Ensure phi input at index I either comes from the Ith
       // predecessor or from a block that dominates this predecessor.
-      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-        HInstruction* input = phi->InputAt(i);
+      for (size_t i = 0; i < input_records.size(); ++i) {
+        HInstruction* input = input_records[i].GetInstruction();
         HBasicBlock* predecessor = predecessors[i];
         if (!(input->GetBlock() == predecessor
               || input->GetBlock()->Dominates(predecessor))) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 46db6e3..b3d5341 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -31,7 +31,7 @@
 #include "nodes.h"
 #include "optimization.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 
@@ -122,7 +122,10 @@
             new DisassemblerOptions(/* absolute_addresses */ false,
                                     base_address,
                                     end_address,
-                                    /* can_read_literals */ true)));
+                                    /* can_read_literals */ true,
+                                    Is64BitInstructionSet(instruction_set)
+                                        ? &Thread::DumpThreadOffset<PointerSize::k64>
+                                        : &Thread::DumpThreadOffset<PointerSize::k32>)));
   }
 
   ~HGraphVisualizerDisassembler() {
@@ -298,6 +301,12 @@
         stream << constant->AsIntConstant()->GetValue();
       } else if (constant->IsLongConstant()) {
         stream << constant->AsLongConstant()->GetValue();
+      } else if (constant->IsFloatConstant()) {
+        stream << constant->AsFloatConstant()->GetValue();
+      } else if (constant->IsDoubleConstant()) {
+        stream << constant->AsDoubleConstant()->GetValue();
+      } else if (constant->IsNullConstant()) {
+        stream << "null";
       }
     } else if (location.IsInvalid()) {
       stream << "invalid";
@@ -372,6 +381,10 @@
   }
 
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    StartAttributeStream("load_kind") << load_class->GetLoadKind();
+    const char* descriptor = load_class->GetDexFile().GetTypeDescriptor(
+        load_class->GetDexFile().GetTypeId(load_class->GetTypeIndex()));
+    StartAttributeStream("class_name") << PrettyDescriptor(descriptor);
     StartAttributeStream("gen_clinit_check") << std::boolalpha
         << load_class->MustGenerateClinitCheck() << std::noboolalpha;
     StartAttributeStream("needs_access_check") << std::boolalpha
@@ -394,6 +407,24 @@
         << instance_of->MustDoNullCheck() << std::noboolalpha;
   }
 
+  void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
+    StartAttributeStream("is_string_length") << std::boolalpha
+        << array_length->IsStringLength() << std::noboolalpha;
+    if (array_length->IsEmittedAtUseSite()) {
+      StartAttributeStream("emitted_at_use") << "true";
+    }
+  }
+
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
+    StartAttributeStream("is_string_char_at") << std::boolalpha
+        << bounds_check->IsStringCharAt() << std::noboolalpha;
+  }
+
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    StartAttributeStream("is_string_char_at") << std::boolalpha
+        << array_get->IsStringCharAt() << std::noboolalpha;
+  }
+
   void VisitArraySet(HArraySet* array_set) OVERRIDE {
     StartAttributeStream("value_can_be_null") << std::boolalpha
         << array_set->GetValueCanBeNull() << std::noboolalpha;
@@ -492,12 +523,13 @@
 
   void PrintInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
-    if (instruction->InputCount() > 0) {
-      StringList inputs;
-      for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
-        inputs.NewEntryStream() << GetTypeId(it.Current()->GetType()) << it.Current()->GetId();
+    HConstInputsRef inputs = instruction->GetInputs();
+    if (!inputs.empty()) {
+      StringList input_list;
+      for (const HInstruction* input : inputs) {
+        input_list.NewEntryStream() << GetTypeId(input->GetType()) << input->GetId();
       }
-      StartAttributeStream() << inputs;
+      StartAttributeStream() << input_list;
     }
     instruction->Accept(this);
     if (instruction->HasEnvironment()) {
@@ -539,12 +571,12 @@
       StartAttributeStream("liveness") << instruction->GetLifetimePosition();
       LocationSummary* locations = instruction->GetLocations();
       if (locations != nullptr) {
-        StringList inputs;
-        for (size_t i = 0; i < instruction->InputCount(); ++i) {
-          DumpLocation(inputs.NewEntryStream(), locations->InAt(i));
+        StringList input_list;
+        for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+          DumpLocation(input_list.NewEntryStream(), locations->InAt(i));
         }
         std::ostream& attr = StartAttributeStream("locations");
-        attr << inputs << "->";
+        attr << input_list << "->";
         DumpLocation(attr, locations->Out());
       }
     }
@@ -734,8 +766,8 @@
       HInstruction* instruction = it.Current();
       output_ << instruction->GetId() << " " << GetTypeId(instruction->GetType())
               << instruction->GetId() << "[ ";
-      for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
-        output_ << inputs.Current()->GetId() << " ";
+      for (const HInstruction* input : instruction->GetInputs()) {
+        output_ << input->GetId() << " ";
       }
       output_ << "]\n";
     }
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index c06d19d..129c2a9 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -152,8 +152,8 @@
 
   // Visit all descendants.
   uint32_t low = d1;
-  for (size_t i = 0, count = instruction->InputCount(); i < count; ++i) {
-    low = std::min(low, VisitDescendant(loop, instruction->InputAt(i)));
+  for (HInstruction* input : instruction->GetInputs()) {
+    low = std::min(low, VisitDescendant(loop, input));
   }
 
   // Lower or found SCC?
@@ -341,11 +341,11 @@
                                                                          HInstruction* phi,
                                                                          size_t input_index) {
   // Match all phi inputs from input_index onwards exactly.
-  const size_t count = phi->InputCount();
-  DCHECK_LT(input_index, count);
-  InductionInfo* a = LookupInfo(loop, phi->InputAt(input_index));
-  for (size_t i = input_index + 1; i < count; i++) {
-    InductionInfo* b = LookupInfo(loop, phi->InputAt(i));
+  HInputsRef inputs = phi->GetInputs();
+  DCHECK_LT(input_index, inputs.size());
+  InductionInfo* a = LookupInfo(loop, inputs[input_index]);
+  for (size_t i = input_index + 1; i < inputs.size(); i++) {
+    InductionInfo* b = LookupInfo(loop, inputs[i]);
     if (!InductionEqual(a, b)) {
       return nullptr;
     }
@@ -464,12 +464,12 @@
 HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi,
                                                                       size_t input_index) {
   // Match all phi inputs from input_index onwards exactly.
-  const size_t count = phi->InputCount();
-  DCHECK_LT(input_index, count);
-  auto ita = cycle_.find(phi->InputAt(input_index));
+  HInputsRef inputs = phi->GetInputs();
+  DCHECK_LT(input_index, inputs.size());
+  auto ita = cycle_.find(inputs[input_index]);
   if (ita != cycle_.end()) {
-    for (size_t i = input_index + 1; i < count; i++) {
-      auto itb = cycle_.find(phi->InputAt(i));
+    for (size_t i = input_index + 1; i < inputs.size(); i++) {
+      auto itb = cycle_.find(inputs[i]);
       if (itb == cycle_.end() ||
           !HInductionVarAnalysis::InductionEqual(ita->second, itb->second)) {
         return nullptr;
@@ -670,7 +670,7 @@
   //     an unsigned entity, for example, as in the following loop that uses the full range:
   //     for (int i = INT_MIN; i < INT_MAX; i++) // TC = UINT_MAX
   // (2) The TC is only valid if the loop is taken, otherwise TC = 0, as in:
-  //     for (int i = 12; i < U; i++) // TC = 0 when U < 12
+  //     for (int i = 12; i < U; i++) // TC = 0 when U <= 12
   //     If this cannot be determined at compile-time, the TC is only valid within the
   //     loop-body proper, not the loop-header unless enforced with an explicit taken-test.
   // (3) The TC is only valid if the loop is finite, otherwise TC has no value, as in:
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index f1965f0..cd4c830 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -39,9 +39,9 @@
 
   void Run() OVERRIDE;
 
- private:
   static constexpr const char* kInductionPassName = "induction_var_analysis";
 
+ private:
   struct NodeInfo {
     explicit NodeInfo(uint32_t d) : depth(d), done(false) {}
     uint32_t depth;
@@ -132,7 +132,7 @@
                                  InductionInfo* a,
                                  InductionInfo* b,
                                  Primitive::Type type) {
-    DCHECK(a != nullptr);
+    DCHECK(a != nullptr && b != nullptr);
     return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, type);
   }
 
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index bc920d9..5e587e0 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -73,9 +73,12 @@
   return v;
 }
 
-/**
- * Corrects a value for type to account for arithmetic wrap-around in lower precision.
- */
+/** Helper method to test for a constant value. */
+static bool IsConstantValue(InductionVarRange::Value v) {
+  return v.is_known && v.a_constant == 0;
+}
+
+/** Corrects a value for type to account for arithmetic wrap-around in lower precision. */
 static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimShort:
@@ -85,26 +88,15 @@
       // TODO: maybe some room for improvement, like allowing widening conversions
       const int32_t min = Primitive::MinValueOfIntegralType(type);
       const int32_t max = Primitive::MaxValueOfIntegralType(type);
-      return (v.is_known && v.a_constant == 0 && min <= v.b_constant && v.b_constant <= max)
+      return (IsConstantValue(v) && min <= v.b_constant && v.b_constant <= max)
           ? v
           : InductionVarRange::Value();
     }
     default:
-      // At int or higher.
       return v;
   }
 }
 
-/** Helper method to test for a constant value. */
-static bool IsConstantValue(InductionVarRange::Value v) {
-  return v.is_known && v.a_constant == 0;
-}
-
-/** Helper method to test for same constant value. */
-static bool IsSameConstantValue(InductionVarRange::Value v1, InductionVarRange::Value v2) {
-  return IsConstantValue(v1) && IsConstantValue(v2) && v1.b_constant == v2.b_constant;
-}
-
 /** Helper method to insert an instruction. */
 static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
   DCHECK(block != nullptr);
@@ -119,22 +111,22 @@
 //
 
 InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis)
-    : induction_analysis_(induction_analysis) {
+    : induction_analysis_(induction_analysis),
+      chase_hint_(nullptr) {
   DCHECK(induction_analysis != nullptr);
 }
 
 bool InductionVarRange::GetInductionRange(HInstruction* context,
                                           HInstruction* instruction,
+                                          HInstruction* chase_hint,
                                           /*out*/Value* min_val,
                                           /*out*/Value* max_val,
                                           /*out*/bool* needs_finite_test) {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop == nullptr) {
-    return false;  // no loop
-  }
-  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
-  if (info == nullptr) {
-    return false;  // no induction information
+  HLoopInformation* loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* info = nullptr;
+  HInductionVarAnalysis::InductionInfo* trip = nullptr;
+  if (!HasInductionInfo(context, instruction, &loop, &info, &trip)) {
+    return false;
   }
   // Type int or lower (this is not too restrictive since intended clients, like
   // bounds check elimination, will have truncated higher precision induction
@@ -148,45 +140,15 @@
     default:
       return false;
   }
-  // Set up loop information.
-  HBasicBlock* header = loop->GetHeader();
-  bool in_body = context->GetBlock() != header;
-  HInductionVarAnalysis::InductionInfo* trip =
-      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
   // Find range.
+  chase_hint_ = chase_hint;
+  bool in_body = context->GetBlock() != loop->GetHeader();
   *min_val = GetVal(info, trip, in_body, /* is_min */ true);
   *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
   *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
   return true;
 }
 
-bool InductionVarRange::RefineOuter(/*in-out*/ Value* min_val,
-                                    /*in-out*/ Value* max_val) const {
-  if (min_val->instruction != nullptr || max_val->instruction != nullptr) {
-    Value v1_min = RefineOuter(*min_val, /* is_min */ true);
-    Value v2_max = RefineOuter(*max_val, /* is_min */ false);
-    // The refined range is safe if both sides refine the same instruction. Otherwise, since two
-    // different ranges are combined, the new refined range is safe to pass back to the client if
-    // the extremes of the computed ranges ensure no arithmetic wrap-around anomalies occur.
-    if (min_val->instruction != max_val->instruction) {
-      Value v1_max = RefineOuter(*min_val, /* is_min */ false);
-      Value v2_min = RefineOuter(*max_val, /* is_min */ true);
-      if (!IsConstantValue(v1_max) ||
-          !IsConstantValue(v2_min) ||
-          v1_max.b_constant > v2_min.b_constant) {
-        return false;
-      }
-    }
-    // Did something change?
-    if (v1_min.instruction != min_val->instruction || v2_max.instruction != max_val->instruction) {
-      *min_val = v1_min;
-      *max_val = v2_max;
-      return true;
-    }
-  }
-  return false;
-}
-
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
                                         /*out*/bool* needs_finite_test,
@@ -226,7 +188,7 @@
 
 bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
                                    ConstantRequest request,
-                                   /*out*/ int64_t *value) const {
+                                   /*out*/ int64_t* value) const {
   if (info != nullptr) {
     // A direct 32-bit or 64-bit constant fetch. This immediately satisfies
     // any of the three requests (kExact, kAtMost, and KAtLeast).
@@ -236,27 +198,16 @@
         return true;
       }
     }
-    // Try range analysis while traversing outward on loops.
-    bool in_body = true;  // no known trip count
-    Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true);
-    Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false);
-    do {
-      // Make sure *both* extremes are known to avoid arithmetic wrap-around anomalies.
-      if (IsConstantValue(v_min) && IsConstantValue(v_max) && v_min.b_constant <= v_max.b_constant) {
-        if ((request == kExact && v_min.b_constant == v_max.b_constant) || request == kAtMost) {
-          *value = v_max.b_constant;
-          return true;
-        } else if (request == kAtLeast) {
-          *value = v_min.b_constant;
-          return true;
-        }
-      }
-    } while (RefineOuter(&v_min, &v_max));
-    // Exploit array length + c >= c, with c <= 0 to avoid arithmetic wrap-around anomalies
-    // (e.g. array length == maxint and c == 1 would yield minint).
-    if (request == kAtLeast) {
-      if (v_min.a_constant == 1 && v_min.b_constant <= 0 && v_min.instruction->IsArrayLength()) {
-        *value = v_min.b_constant;
+    // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies.
+    Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true);
+    Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false);
+    if (IsConstantValue(min_val) &&
+        IsConstantValue(max_val) && min_val.b_constant <= max_val.b_constant) {
+      if ((request == kExact && min_val.b_constant == max_val.b_constant) || request == kAtMost) {
+        *value = max_val.b_constant;
+        return true;
+      } else if (request == kAtLeast) {
+        *value = min_val.b_constant;
         return true;
       }
     }
@@ -264,6 +215,51 @@
   return false;
 }
 
+bool InductionVarRange::HasInductionInfo(
+    HInstruction* context,
+    HInstruction* instruction,
+    /*out*/ HLoopInformation** loop,
+    /*out*/ HInductionVarAnalysis::InductionInfo** info,
+    /*out*/ HInductionVarAnalysis::InductionInfo** trip) const {
+  HLoopInformation* l = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
+  if (l != nullptr) {
+    HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(l, instruction);
+    if (i != nullptr) {
+      *loop = l;
+      *info = i;
+      *trip = induction_analysis_->LookupInfo(l, l->GetHeader()->GetLastInstruction());
+      return true;
+    }
+  }
+  return false;
+}
+
+bool InductionVarRange::IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
+  if (trip != nullptr) {
+    // Both bounds that define a trip-count are well-behaved if they either are not defined
+    // in any loop, or are contained in a proper interval. This allows finding the min/max
+    // of an expression by chasing outward.
+    InductionVarRange range(induction_analysis_);
+    HInductionVarAnalysis::InductionInfo* lower = trip->op_b->op_a;
+    HInductionVarAnalysis::InductionInfo* upper = trip->op_b->op_b;
+    int64_t not_used = 0;
+    return (!HasFetchInLoop(lower) || range.IsConstant(lower, kAtLeast, &not_used)) &&
+           (!HasFetchInLoop(upper) || range.IsConstant(upper, kAtLeast, &not_used));
+  }
+  return true;
+}
+
+bool InductionVarRange::HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const {
+  if (info != nullptr) {
+    if (info->induction_class == HInductionVarAnalysis::kInvariant &&
+        info->operation == HInductionVarAnalysis::kFetch) {
+      return info->fetch->GetBlock()->GetLoopInformation() != nullptr;
+    }
+    return HasFetchInLoop(info->op_a) || HasFetchInLoop(info->op_b);
+  }
+  return false;
+}
+
 bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const {
   if (info != nullptr) {
     if (info->induction_class == HInductionVarAnalysis::kLinear) {
@@ -299,13 +295,13 @@
                                                       HInductionVarAnalysis::InductionInfo* trip,
                                                       bool in_body,
                                                       bool is_min) const {
-  // Detect common situation where an offset inside the trip count cancels out during range
+  // Detect common situation where an offset inside the trip-count cancels out during range
   // analysis (finding max a * (TC - 1) + OFFSET for a == 1 and TC = UPPER - OFFSET or finding
   // min a * (TC - 1) + OFFSET for a == -1 and TC = OFFSET - UPPER) to avoid losing information
   // with intermediate results that only incorporate single instructions.
   if (trip != nullptr) {
     HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a;
-    if (trip_expr->operation == HInductionVarAnalysis::kSub) {
+    if (trip_expr->type == info->type && trip_expr->operation == HInductionVarAnalysis::kSub) {
       int64_t stride_value = 0;
       if (IsConstant(info->op_a, kExact, &stride_value)) {
         if (!is_min && stride_value == 1) {
@@ -349,12 +345,25 @@
                                                      HInductionVarAnalysis::InductionInfo* trip,
                                                      bool in_body,
                                                      bool is_min) const {
-  // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes
-  // more likely range analysis will compare the same instructions as terminal nodes.
+  // Stop chasing the instruction at constant or hint.
   int64_t value;
-  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value))  {
+  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
     return Value(static_cast<int32_t>(value));
-  } else if (instruction->IsAdd()) {
+  } else if (instruction == chase_hint_) {
+    return Value(instruction, 1, 0);
+  }
+  // Special cases when encountering a single instruction that denotes trip count in the
+  // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int
+  if (in_body && trip != nullptr && instruction == trip->op_a->fetch) {
+    if (is_min) {
+      return Value(1);
+    } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) {
+      return Value(std::numeric_limits<int32_t>::max());
+    }
+  }
+  // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely
+  // range analysis will compare the same instructions as terminal nodes.
+  if (instruction->IsAdd()) {
     if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
       return AddValue(Value(static_cast<int32_t>(value)),
                       GetFetch(instruction->InputAt(1), trip, in_body, is_min));
@@ -362,19 +371,35 @@
       return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
                       Value(static_cast<int32_t>(value)));
     }
-  } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
-    return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
+  } else if (instruction->IsArrayLength()) {
+    // Return extreme values when chasing constants. Otherwise, chase deeper.
+    if (chase_hint_ == nullptr) {
+      return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max());
+    } else if (instruction->InputAt(0)->IsNewArray()) {
+      return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
+    }
   } else if (instruction->IsTypeConversion()) {
     // Since analysis is 32-bit (or narrower) we allow a widening along the path.
     if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt &&
         instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) {
       return GetFetch(instruction->InputAt(0), trip, in_body, is_min);
     }
-  } else if (is_min) {
-    // Special case for finding minimum: minimum of trip-count in loop-body is 1.
-    if (trip != nullptr && in_body && instruction == trip->op_a->fetch) {
-      return Value(1);
-    }
+  }
+  // Chase an invariant fetch that is defined by an outer loop if the trip-count used
+  // so far is well-behaved in both bounds and the next trip-count is safe.
+  // Example:
+  //   for (int i = 0; i <= 100; i++)  // safe
+  //     for (int j = 0; j <= i; j++)  // well-behaved
+  //       j is in range [0, i  ] (if i is chase hint)
+  //         or in range [0, 100] (otherwise)
+  HLoopInformation* next_loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* next_info = nullptr;
+  HInductionVarAnalysis::InductionInfo* next_trip = nullptr;
+  bool next_in_body = true;  // inner loop is always in body of outer loop
+  if (HasInductionInfo(instruction, instruction, &next_loop, &next_info, &next_trip) &&
+      IsWellBehavedTripCount(trip) &&
+      !IsUnsafeTripCount(next_trip)) {
+    return GetVal(next_info, next_trip, next_in_body, is_min);
   }
   return Value(instruction, 1, 0);
 }
@@ -421,9 +446,8 @@
             break;
         }
         break;
-      case HInductionVarAnalysis::kLinear: {
+      case HInductionVarAnalysis::kLinear:
         return CorrectForType(GetLinear(info, trip, in_body, is_min), info->type);
-      }
       case HInductionVarAnalysis::kWrapAround:
       case HInductionVarAnalysis::kPeriodic:
         return MergeVal(GetVal(info->op_a, trip, in_body, is_min),
@@ -438,20 +462,18 @@
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
                                                    bool is_min) const {
+  // Constant times range.
+  int64_t value = 0;
+  if (IsConstant(info1, kExact, &value)) {
+    return MulRangeAndConstant(value, info2, trip, in_body, is_min);
+  } else if (IsConstant(info2, kExact, &value)) {
+    return MulRangeAndConstant(value, info1, trip, in_body, is_min);
+  }
+  // Interval ranges.
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
-  // Try to refine first operand.
-  if (!IsConstantValue(v1_min) && !IsConstantValue(v1_max)) {
-    RefineOuter(&v1_min, &v1_max);
-  }
-  // Constant times range.
-  if (IsSameConstantValue(v1_min, v1_max)) {
-    return MulRangeAndConstant(v2_min, v2_max, v1_min, is_min);
-  } else if (IsSameConstantValue(v2_min, v2_max)) {
-    return MulRangeAndConstant(v1_min, v1_max, v2_min, is_min);
-  }
   // Positive range vs. positive or negative range.
   if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
     if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
@@ -476,14 +498,16 @@
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
                                                    bool is_min) const {
+  // Range divided by constant.
+  int64_t value = 0;
+  if (IsConstant(info2, kExact, &value)) {
+    return DivRangeAndConstant(value, info1, trip, in_body, is_min);
+  }
+  // Interval ranges.
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
-  // Range divided by constant.
-  if (IsSameConstantValue(v2_min, v2_max)) {
-    return DivRangeAndConstant(v1_min, v1_max, v2_min, is_min);
-  }
   // Positive range vs. positive or negative range.
   if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
     if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
@@ -503,18 +527,30 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MulRangeAndConstant(Value v_min,
-                                                                Value v_max,
-                                                                Value c,
-                                                                bool is_min) const {
-  return is_min == (c.b_constant >= 0) ? MulValue(v_min, c) : MulValue(v_max, c);
+InductionVarRange::Value InductionVarRange::MulRangeAndConstant(
+    int64_t value,
+    HInductionVarAnalysis::InductionInfo* info,
+    HInductionVarAnalysis::InductionInfo* trip,
+    bool in_body,
+    bool is_min) const {
+  if (CanLongValueFitIntoInt(value)) {
+    Value c(static_cast<int32_t>(value));
+    return MulValue(GetVal(info, trip, in_body, is_min == value >= 0), c);
+  }
+  return Value();
 }
 
-InductionVarRange::Value InductionVarRange::DivRangeAndConstant(Value v_min,
-                                                                Value v_max,
-                                                                Value c,
-                                                                bool is_min) const {
-  return is_min == (c.b_constant >= 0) ? DivValue(v_min, c) : DivValue(v_max, c);
+InductionVarRange::Value InductionVarRange::DivRangeAndConstant(
+    int64_t value,
+    HInductionVarAnalysis::InductionInfo* info,
+    HInductionVarAnalysis::InductionInfo* trip,
+    bool in_body,
+    bool is_min) const {
+  if (CanLongValueFitIntoInt(value)) {
+    Value c(static_cast<int32_t>(value));
+    return DivValue(GetVal(info, trip, in_body, is_min == value >= 0), c);
+  }
+  return Value();
 }
 
 InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const {
@@ -580,28 +616,6 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const {
-  if (v.instruction == nullptr) {
-    return v;  // nothing to refine
-  }
-  HLoopInformation* loop =
-      v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop == nullptr) {
-    return v;  // no loop
-  }
-  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, v.instruction);
-  if (info == nullptr) {
-    return v;  // no induction information
-  }
-  // Set up loop information.
-  HBasicBlock* header = loop->GetHeader();
-  bool in_body = true;  // inner always in more outer
-  HInductionVarAnalysis::InductionInfo* trip =
-      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-  // Try to refine "a x instruction + b" with outer loop range information on instruction.
-  return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)), Value(v.b_constant));
-}
-
 bool InductionVarRange::GenerateCode(HInstruction* context,
                                      HInstruction* instruction,
                                      HGraph* graph,
@@ -611,27 +625,18 @@
                                      /*out*/HInstruction** taken_test,
                                      /*out*/bool* needs_finite_test,
                                      /*out*/bool* needs_taken_test) const {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop == nullptr) {
-    return false;  // no loop
-  }
-  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
-  if (info == nullptr) {
-    return false;  // no induction information
-  }
-  // Set up loop information.
-  HBasicBlock* header = loop->GetHeader();
-  bool in_body = context->GetBlock() != header;
-  HInductionVarAnalysis::InductionInfo* trip =
-      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-  if (trip == nullptr) {
-    return false;  // codegen relies on trip count
+  HLoopInformation* loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* info = nullptr;
+  HInductionVarAnalysis::InductionInfo* trip = nullptr;
+  if (!HasInductionInfo(context, instruction, &loop, &info, &trip) || trip == nullptr) {
+    return false;  // codegen needs all information, including tripcount
   }
   // Determine what tests are needed. A finite test is needed if the evaluation code uses the
   // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
   // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
   // code does not use the trip-count explicitly (since there could be an implicit relation
   // between e.g. an invariant subscript and a not-taken condition).
+  bool in_body = context->GetBlock() != loop->GetHeader();
   *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
   *needs_taken_test = IsBodyTripCount(trip);
   // Code generation for taken test: generate the code when requested or otherwise analyze
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 0af4156..00aaa16 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -57,21 +57,19 @@
   explicit InductionVarRange(HInductionVarAnalysis* induction);
 
   /**
-   * Given a context denoted by the first instruction, returns a possibly conservative
-   * lower and upper bound on the instruction's value in the output parameters min_val
-   * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test
-   * is needed to protect the range evaluation inside its loop. Returns false on failure.
+   * Given a context denoted by the first instruction, returns a possibly conservative lower
+   * and upper bound on the instruction's value in the output parameters min_val and max_val,
+   * respectively. The need_finite_test flag denotes if an additional finite-test is needed
+   * to protect the range evaluation inside its loop. The parameter chase_hint defines an
+   * instruction at which chasing may stop. Returns false on failure.
    */
   bool GetInductionRange(HInstruction* context,
                          HInstruction* instruction,
+                         HInstruction* chase_hint,
                          /*out*/ Value* min_val,
                          /*out*/ Value* max_val,
                          /*out*/ bool* needs_finite_test);
 
-  /** Refines the values with induction of next outer loop. Returns true on change. */
-  bool RefineOuter(/*in-out*/ Value* min_val,
-                   /*in-out*/ Value* max_val) const;
-
   /**
    * Returns true if range analysis is able to generate code for the lower and upper
    * bound expressions on the instruction in the given context. The need_finite_test
@@ -132,11 +130,20 @@
    */
   bool IsConstant(HInductionVarAnalysis::InductionInfo* info,
                   ConstantRequest request,
-                  /*out*/ int64_t *value) const;
+                  /*out*/ int64_t* value) const;
 
+  /** Returns whether induction information can be obtained. */
+  bool HasInductionInfo(HInstruction* context,
+                        HInstruction* instruction,
+                        /*out*/ HLoopInformation** loop,
+                        /*out*/ HInductionVarAnalysis::InductionInfo** info,
+                        /*out*/ HInductionVarAnalysis::InductionInfo** trip) const;
+
+  bool HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const;
   bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
   bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
   bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
+  bool IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
 
   Value GetLinear(HInductionVarAnalysis::InductionInfo* info,
                   HInductionVarAnalysis::InductionInfo* trip,
@@ -161,8 +168,16 @@
                bool in_body,
                bool is_min) const;
 
-  Value MulRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const;
-  Value DivRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const;
+  Value MulRangeAndConstant(int64_t value,
+                            HInductionVarAnalysis::InductionInfo* info,
+                            HInductionVarAnalysis::InductionInfo* trip,
+                            bool in_body,
+                            bool is_min) const;
+  Value DivRangeAndConstant(int64_t value,
+                            HInductionVarAnalysis::InductionInfo* info,
+                            HInductionVarAnalysis::InductionInfo* trip,
+                            bool in_body,
+                            bool is_min) const;
 
   Value AddValue(Value v1, Value v2) const;
   Value SubValue(Value v1, Value v2) const;
@@ -171,12 +186,6 @@
   Value MergeVal(Value v1, Value v2, bool is_min) const;
 
   /**
-   * Returns refined value using induction of next outer loop or the input value if no
-   * further refinement is possible.
-   */
-  Value RefineOuter(Value val, bool is_min) const;
-
-  /**
    * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
    * With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
@@ -200,7 +209,10 @@
                     bool is_min) const;
 
   /** Results of prior induction variable analysis. */
-  HInductionVarAnalysis *induction_analysis_;
+  HInductionVarAnalysis* induction_analysis_;
+
+  /** Instruction at which chasing may stop. */
+  HInstruction* chase_hint_;
 
   friend class HInductionVarAnalysis;
   friend class InductionVarRangeTest;
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index dc04dc2..4ea170f 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -66,6 +66,8 @@
     entry_block_->AddInstruction(x_);
     y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
     entry_block_->AddInstruction(y_);
+    // Set arbitrary range analysis hint while testing private methods.
+    SetHint(x_);
   }
 
   /** Constructs loop with given upper bound. */
@@ -111,6 +113,11 @@
     iva_->Run();
   }
 
+  /** Sets hint. */
+  void SetHint(HInstruction* hint) {
+    range_.chase_hint_ = hint;
+  }
+
   /** Constructs an invariant. */
   HInductionVarAnalysis::InductionInfo* CreateInvariant(char opc,
                                                         HInductionVarAnalysis::InductionInfo* a,
@@ -122,6 +129,7 @@
       case 'n': op = HInductionVarAnalysis::kNeg; break;
       case '*': op = HInductionVarAnalysis::kMul; break;
       case '/': op = HInductionVarAnalysis::kDiv; break;
+      case '<': op = HInductionVarAnalysis::kLT;  break;
       default:  op = HInductionVarAnalysis::kNop; break;
     }
     return iva_->CreateInvariantOp(op, a, b);
@@ -137,22 +145,21 @@
     return CreateFetch(graph_->GetIntConstant(c));
   }
 
-  /** Constructs a trip-count. */
+  /** Constructs a constant trip-count. */
   HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) {
-    Primitive::Type type = Primitive::kPrimInt;
+    HInductionVarAnalysis::InductionOp op = HInductionVarAnalysis::kTripCountInBodyUnsafe;
     if (in_loop && safe) {
-      return iva_->CreateTripCount(
-          HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr, type);
+      op = HInductionVarAnalysis::kTripCountInLoop;
     } else if (in_loop) {
-      return iva_->CreateTripCount(
-          HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr, type);
+      op = HInductionVarAnalysis::kTripCountInLoopUnsafe;
     } else if (safe) {
-      return iva_->CreateTripCount(
-          HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr, type);
-    } else {
-      return iva_->CreateTripCount(
-          HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr, type);
+      op = HInductionVarAnalysis::kTripCountInBody;
     }
+    // Return TC with taken-test 0 < TC.
+    return iva_->CreateTripCount(op,
+                                 CreateConst(tc),
+                                 CreateInvariant('<', CreateConst(0), CreateConst(tc)),
+                                 Primitive::kPrimInt);
   }
 
   /** Constructs a linear a * i + b induction. */
@@ -197,13 +204,13 @@
   }
 
   Value GetMin(HInductionVarAnalysis::InductionInfo* info,
-               HInductionVarAnalysis::InductionInfo* induc) {
-    return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ true);
+               HInductionVarAnalysis::InductionInfo* trip) {
+    return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ true);
   }
 
   Value GetMax(HInductionVarAnalysis::InductionInfo* info,
-               HInductionVarAnalysis::InductionInfo* induc) {
-    return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ false);
+               HInductionVarAnalysis::InductionInfo* trip) {
+    return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ false);
   }
 
   Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
@@ -558,6 +565,31 @@
   ExpectEqual(Value(), MaxValue(Value(55), Value(y_, 1, -50)));
 }
 
+TEST_F(InductionVarRangeTest, ArrayLengthAndHints) {
+  HInstruction* new_array = new (&allocator_)
+      HNewArray(x_,
+                graph_->GetCurrentMethod(),
+                0, Primitive::kPrimInt,
+                graph_->GetDexFile(),
+                kQuickAllocArray);
+  entry_block_->AddInstruction(new_array);
+  HInstruction* array_length = new (&allocator_) HArrayLength(new_array, 0);
+  entry_block_->AddInstruction(array_length);
+  // With null hint: yields extreme constants.
+  const int32_t max_value = std::numeric_limits<int32_t>::max();
+  SetHint(nullptr);
+  ExpectEqual(Value(0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(max_value), GetMax(CreateFetch(array_length), nullptr));
+  // With explicit hint: yields the length instruction.
+  SetHint(array_length);
+  ExpectEqual(Value(array_length, 1, 0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(array_length, 1, 0), GetMax(CreateFetch(array_length), nullptr));
+  // With any non-null hint: chases beyond the length instruction.
+  SetHint(x_);
+  ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(array_length), nullptr));
+}
+
 //
 // Tests on public methods.
 //
@@ -570,23 +602,20 @@
   bool needs_finite_test = true;
 
   // In context of header: known.
-  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
-  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -597,23 +626,20 @@
   bool needs_finite_test = true;
 
   // In context of header: known.
-  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
-  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -625,23 +651,20 @@
   bool needs_taken_test = true;
 
   // In context of header: upper unknown.
-  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(x_, 1, -1), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
-  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(x_, 1, 0), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
@@ -695,23 +718,20 @@
   bool needs_taken_test = true;
 
   // In context of header: lower unknown.
-  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(x_, 1, 1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
-  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(x_, 1, 0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index c8a983b..451aa38 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -17,6 +17,7 @@
 #include "inliner.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "builder.h"
 #include "class_linker.h"
 #include "constant_folding.h"
@@ -35,7 +36,7 @@
 #include "nodes.h"
 #include "optimizing_compiler.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "quick/inline_method_analyser.h"
 #include "sharpening.h"
 #include "ssa_builder.h"
@@ -151,7 +152,7 @@
   }
 
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  size_t pointer_size = cl->GetImagePointerSize();
+  PointerSize pointer_size = cl->GetImagePointerSize();
   if (invoke->IsInvokeInterface()) {
     resolved_method = info.GetTypeHandle()->FindVirtualMethodForInterface(
         resolved_method, pointer_size);
@@ -185,6 +186,17 @@
   }
 }
 
+static uint32_t FindMethodIndexIn(ArtMethod* method,
+                                  const DexFile& dex_file,
+                                  uint32_t name_and_signature_index)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
+    return method->GetDexMethodIndex();
+  } else {
+    return method->FindDexMethodIndexInOtherDexFile(dex_file, name_and_signature_index);
+  }
+}
+
 static uint32_t FindClassIndexIn(mirror::Class* cls,
                                  const DexFile& dex_file,
                                  Handle<mirror::DexCache> dex_cache)
@@ -232,7 +244,7 @@
 
   ~ScopedProfilingInfoInlineUse() {
     if (profiling_info_ != nullptr) {
-      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       DCHECK_EQ(profiling_info_, method_->GetProfilingInfo(pointer_size));
       Runtime::Current()->GetJit()->GetCodeCache()->DoneCompilerUse(method_, self_);
     }
@@ -379,7 +391,7 @@
   }
 
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
   if (invoke_instruction->IsInvokeInterface()) {
     resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
         resolved_method, pointer_size);
@@ -471,7 +483,7 @@
   }
 
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
 
   bool all_targets_inlined = true;
@@ -633,7 +645,7 @@
     return false;
   }
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
 
   DCHECK(resolved_method != nullptr);
   ArtMethod* actual_method = nullptr;
@@ -650,7 +662,7 @@
     ArtMethod* new_method = nullptr;
     if (invoke_instruction->IsInvokeInterface()) {
       new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
-          method_index % ImTable::kSize, pointer_size);
+          method_index, pointer_size);
       if (new_method->IsRuntimeMethod()) {
         // Bail out as soon as we see a conflict trampoline in one of the target's
         // interface table.
@@ -743,13 +755,54 @@
 bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
   HInstruction* return_replacement = nullptr;
   if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
-    return false;
+    if (invoke_instruction->IsInvokeInterface()) {
+      // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
+      // better than an invoke-interface because:
+      // 1) In the best case, the interface call has one more indirection (to fetch the IMT).
+      // 2) We will not go to the conflict trampoline with an invoke-virtual.
+      // TODO: Consider sharpening once it is not dependent on the compiler driver.
+      const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+      uint32_t method_index = FindMethodIndexIn(
+          method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
+      if (method_index == DexFile::kDexNoIndex) {
+        return false;
+      }
+      HInvokeVirtual* new_invoke = new (graph_->GetArena()) HInvokeVirtual(
+          graph_->GetArena(),
+          invoke_instruction->GetNumberOfArguments(),
+          invoke_instruction->GetType(),
+          invoke_instruction->GetDexPc(),
+          method_index,
+          method->GetMethodIndex());
+      HInputsRef inputs = invoke_instruction->GetInputs();
+      for (size_t index = 0; index != inputs.size(); ++index) {
+        new_invoke->SetArgumentAt(index, inputs[index]);
+      }
+      invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction);
+      new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+      if (invoke_instruction->GetType() == Primitive::kPrimNot) {
+        new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
+      }
+      return_replacement = new_invoke;
+    } else {
+      // TODO: Consider sharpening an invoke virtual once it is not dependent on the
+      // compiler driver.
+      return false;
+    }
   }
   if (return_replacement != nullptr) {
     invoke_instruction->ReplaceWith(return_replacement);
   }
   invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
-  FixUpReturnReferenceType(invoke_instruction, method, return_replacement, do_rtp);
+  FixUpReturnReferenceType(method, return_replacement);
+  if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) {
+    // Actual return value has a more specific type than the method's declared
+    // return type. Run RTP again on the outer graph to propagate it.
+    ReferenceTypePropagation(graph_,
+                             outer_compilation_unit_.GetDexCache(),
+                             handles_,
+                             /* is_first_run */ false).Run();
+  }
   return true;
 }
 
@@ -952,7 +1005,7 @@
           invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
 
           // Check whether the field is final. If it is, we need to add a barrier.
-          size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+          PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
           ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
           DCHECK(resolved_field != nullptr);
           if (resolved_field->IsFinal()) {
@@ -978,7 +1031,7 @@
                                                     uint32_t field_index,
                                                     HInstruction* obj)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+  PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
   ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
   DCHECK(resolved_field != nullptr);
   HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet(
@@ -1006,7 +1059,7 @@
                                                     HInstruction* obj,
                                                     HInstruction* value)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+  PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
   ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
   DCHECK(resolved_field != nullptr);
   HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet(
@@ -1144,6 +1197,15 @@
     }
   }
 
+  // We have replaced formal arguments with actual arguments. If actual types
+  // are more specific than the declared ones, run RTP again on the inner graph.
+  if (ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) {
+    ReferenceTypePropagation(callee_graph,
+                             dex_compilation_unit.GetDexCache(),
+                             handles_,
+                             /* is_first_run */ false).Run();
+  }
+
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   size_t number_of_inlined_instructions =
       RunOptimizations(callee_graph, code_item, dex_compilation_unit);
@@ -1207,14 +1269,6 @@
         return false;
       }
 
-      if (current->IsInvokeInterface()) {
-        // Disable inlining of interface calls. The cost in case of entering the
-        // resolution conflict is currently too high.
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                       << " could not be inlined because it has an interface call.";
-        return false;
-      }
-
       if (!same_dex_file && current->NeedsEnvironment()) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                        << " could not be inlined because " << current->DebugName()
@@ -1317,51 +1371,96 @@
   return number_of_inlined_instructions;
 }
 
-void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction,
-                                        ArtMethod* resolved_method,
-                                        HInstruction* return_replacement,
-                                        bool do_rtp) {
+static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
+                                      bool declared_can_be_null,
+                                      HInstruction* actual_obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (declared_can_be_null && !actual_obj->CanBeNull()) {
+    return true;
+  }
+
+  ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo();
+  return (actual_rti.IsExact() && !declared_rti.IsExact()) ||
+         declared_rti.IsStrictSupertypeOf(actual_rti);
+}
+
+ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) {
+  return ReferenceTypePropagation::IsAdmissible(klass)
+      ? ReferenceTypeInfo::Create(handles_->NewHandle(klass))
+      : graph_->GetInexactObjectRti();
+}
+
+bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) {
+  // If this is an instance call, test whether the type of the `this` argument
+  // is more specific than the class which declares the method.
+  if (!resolved_method->IsStatic()) {
+    if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()),
+                                  /* declared_can_be_null */ false,
+                                  invoke_instruction->InputAt(0u))) {
+      return true;
+    }
+  }
+
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+
+  // Iterate over the list of parameter types and test whether any of the
+  // actual inputs has a more specific reference type than the type declared in
+  // the signature.
+  const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList();
+  for (size_t param_idx = 0,
+              input_idx = resolved_method->IsStatic() ? 0 : 1,
+              e = (param_list == nullptr ? 0 : param_list->Size());
+       param_idx < e;
+       ++param_idx, ++input_idx) {
+    HInstruction* input = invoke_instruction->InputAt(input_idx);
+    if (input->GetType() == Primitive::kPrimNot) {
+      mirror::Class* param_cls = resolved_method->GetDexCacheResolvedType(
+          param_list->GetTypeItem(param_idx).type_idx_,
+          pointer_size);
+      if (IsReferenceTypeRefinement(GetClassRTI(param_cls),
+                                    /* declared_can_be_null */ true,
+                                    input)) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction,
+                                      HInstruction* return_replacement) {
   // Check the integrity of reference types and run another type propagation if needed.
   if (return_replacement != nullptr) {
     if (return_replacement->GetType() == Primitive::kPrimNot) {
+      // Test if the return type is a refinement of the declared return type.
+      if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(),
+                                    /* declared_can_be_null */ true,
+                                    return_replacement)) {
+        return true;
+      }
+    } else if (return_replacement->IsInstanceOf()) {
+      // Inlining InstanceOf into an If may put a tighter bound on reference types.
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method,
+                                        HInstruction* return_replacement) {
+  if (return_replacement != nullptr) {
+    if (return_replacement->GetType() == Primitive::kPrimNot) {
       if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
         // Make sure that we have a valid type for the return. We may get an invalid one when
         // we inline invokes with multiple branches and create a Phi for the result.
         // TODO: we could be more precise by merging the phi inputs but that requires
         // some functionality from the reference type propagation.
         DCHECK(return_replacement->IsPhi());
-        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size);
-        if (cls != nullptr && !cls->IsErroneous()) {
-          ReferenceTypeInfo::TypeHandle return_handle = handles_->NewHandle(cls);
-          return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
-              return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
-        } else {
-          // Return inexact object type on failures.
-          return_replacement->SetReferenceTypeInfo(graph_->GetInexactObjectRti());
-        }
-      }
-
-      if (do_rtp) {
-        // If the return type is a refinement of the declared type run the type propagation again.
-        ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-        ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-        if (invoke_rti.IsStrictSupertypeOf(return_rti)
-            || (return_rti.IsExact() && !invoke_rti.IsExact())
-            || !return_replacement->CanBeNull()) {
-          ReferenceTypePropagation(graph_,
-                                   outer_compilation_unit_.GetDexCache(),
-                                   handles_,
-                                   /* is_first_run */ false).Run();
-        }
-      }
-    } else if (return_replacement->IsInstanceOf()) {
-      if (do_rtp) {
-        // Inlining InstanceOf into an If may put a tighter bound on reference types.
-        ReferenceTypePropagation(graph_,
-                                 outer_compilation_unit_.GetDexCache(),
-                                 handles_,
-                                 /* is_first_run */ false).Run();
+        return_replacement->SetReferenceTypeInfo(GetClassRTI(cls));
       }
     }
   }
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 7cf1424..02d3a5f 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -124,10 +124,18 @@
                                            uint32_t dex_pc) const
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void FixUpReturnReferenceType(HInvoke* invoke_instruction,
-                                ArtMethod* resolved_method,
-                                HInstruction* return_replacement,
-                                bool do_rtp)
+  void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is
+  // admissible (see ReferenceTypePropagation::IsAdmissible for details).
+  // Otherwise returns inexact Object RTI.
+  ReferenceTypeInfo GetClassRTI(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add a type guard on the given `receiver`. This will add to the graph:
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 93081b9..e5dab56 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -16,8 +16,10 @@
 
 #include "instruction_builder.h"
 
+#include "art_method-inl.h"
 #include "bytecode_utils.h"
 #include "class_linker.h"
+#include "dex_instruction-inl.h"
 #include "driver/compiler_options.h"
 #include "scoped_thread_state_change.h"
 
@@ -381,10 +383,11 @@
   // If the operation requests a specific type, we make sure its input is of that type.
   if (type != value->GetType()) {
     if (Primitive::IsFloatingPointType(type)) {
-      return ssa_builder_->GetFloatOrDoubleEquivalent(value, type);
+      value = ssa_builder_->GetFloatOrDoubleEquivalent(value, type);
     } else if (type == Primitive::kPrimNot) {
-      return ssa_builder_->GetReferenceTypeEquivalent(value);
+      value = ssa_builder_->GetReferenceTypeEquivalent(value);
     }
+    DCHECK(value != nullptr);
   }
 
   return value;
@@ -832,7 +835,8 @@
                         register_index,
                         is_range,
                         descriptor,
-                        nullptr /* clinit_check */);
+                        nullptr, /* clinit_check */
+                        true /* is_unresolved */);
   }
 
   // Potential class initialization check, in the case of a static method call.
@@ -888,7 +892,7 @@
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           resolved_method->GetDexMethodIndex());
+                                           resolved_method->GetImtIndex());
   }
 
   return HandleInvoke(invoke,
@@ -897,7 +901,8 @@
                       register_index,
                       is_range,
                       descriptor,
-                      clinit_check);
+                      clinit_check,
+                      false /* is_unresolved */);
 }
 
 bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
@@ -909,12 +914,12 @@
   Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
 
   bool finalizable;
-  bool can_throw = NeedsAccessCheck(type_index, dex_cache, &finalizable);
+  bool needs_access_check = NeedsAccessCheck(type_index, dex_cache, &finalizable);
 
   // Only the non-resolved entrypoint handles the finalizable class case. If we
   // need access checks, then we haven't resolved the method and the class may
   // again be finalizable.
-  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
+  QuickEntrypointEnum entrypoint = (finalizable || needs_access_check)
       ? kQuickAllocObject
       : kQuickAllocObjectInitialized;
 
@@ -929,8 +934,8 @@
       outer_dex_file,
       IsOutermostCompilingClass(type_index),
       dex_pc,
-      /*needs_access_check*/ can_throw,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, type_index));
+      needs_access_check,
+      /* is_in_dex_cache */ false);
 
   AppendInstruction(load_class);
   HInstruction* cls = load_class;
@@ -945,7 +950,7 @@
       dex_pc,
       type_index,
       *dex_compilation_unit_->GetDexFile(),
-      can_throw,
+      needs_access_check,
       finalizable,
       entrypoint));
   return true;
@@ -1021,7 +1026,7 @@
         is_outer_class,
         dex_pc,
         /*needs_access_check*/ false,
-        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index));
+        /* is_in_dex_cache */ false);
     AppendInstruction(load_class);
     clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
     AppendInstruction(clinit_check);
@@ -1090,14 +1095,17 @@
                                        uint32_t register_index,
                                        bool is_range,
                                        const char* descriptor,
-                                       HClinitCheck* clinit_check) {
+                                       HClinitCheck* clinit_check,
+                                       bool is_unresolved) {
   DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
 
   size_t start_index = 0;
   size_t argument_index = 0;
   if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
-    HInstruction* arg = LoadNullCheckedLocal(is_range ? register_index : args[0],
-                                             invoke->GetDexPc());
+    uint32_t obj_reg = is_range ? register_index : args[0];
+    HInstruction* arg = is_unresolved
+        ? LoadLocal(obj_reg, Primitive::kPrimNot)
+        : LoadNullCheckedLocal(obj_reg, invoke->GetDexPc());
     invoke->SetArgumentAt(0, arg);
     start_index = 1;
     argument_index = 1;
@@ -1204,7 +1212,12 @@
       compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
 
 
-  HInstruction* object = LoadNullCheckedLocal(obj_reg, dex_pc);
+  // Generate an explicit null check on the reference, unless the field access
+  // is unresolved. In that case, we rely on the runtime to perform various
+  // checks first, followed by a null check.
+  HInstruction* object = (resolved_field == nullptr)
+      ? LoadLocal(obj_reg, Primitive::kPrimNot)
+      : LoadNullCheckedLocal(obj_reg, dex_pc);
 
   Primitive::Type field_type = (resolved_field == nullptr)
       ? GetFieldAccessType(*dex_file_, field_index)
@@ -1365,15 +1378,13 @@
     }
   }
 
-  bool is_in_cache =
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index);
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
                                                  outer_dex_file,
                                                  is_outer_class,
                                                  dex_pc,
                                                  /*needs_access_check*/ false,
-                                                 is_in_cache);
+                                                 /* is_in_dex_cache */ false);
   AppendInstruction(constant);
 
   HInstruction* cls = constant;
@@ -1642,7 +1653,7 @@
       IsOutermostCompilingClass(type_index),
       dex_pc,
       !can_access,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index));
+      /* is_in_dex_cache */ false);
   AppendInstruction(cls);
 
   TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
@@ -2610,8 +2621,6 @@
       Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
       bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
           dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index);
-      bool is_in_dex_cache =
-          compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index);
       AppendInstruction(new (arena_) HLoadClass(
           graph_->GetCurrentMethod(),
           type_index,
@@ -2619,7 +2628,7 @@
           IsOutermostCompilingClass(type_index),
           dex_pc,
           !can_access,
-          is_in_dex_cache));
+          /* is_in_dex_cache */ false));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 0e3e5a7..517cf76 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -30,6 +30,8 @@
 
 namespace art {
 
+class Instruction;
+
 class HInstructionBuilder : public ValueObject {
  public:
   HInstructionBuilder(HGraph* graph,
@@ -237,7 +239,8 @@
                     uint32_t register_index,
                     bool is_range,
                     const char* descriptor,
-                    HClinitCheck* clinit_check);
+                    HClinitCheck* clinit_check,
+                    bool is_unresolved);
 
   bool HandleStringInit(HInvoke* invoke,
                         uint32_t number_of_vreg_arguments,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index d7b3856..4ca0600 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -54,6 +54,9 @@
   // De Morgan's laws:
   // ~a & ~b = ~(a | b)  and  ~a | ~b = ~(a & b)
   bool TryDeMorganNegationFactoring(HBinaryOperation* op);
+  bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction);
+  bool TrySubtractionChainSimplification(HBinaryOperation* instruction);
+
   void VisitShift(HBinaryOperation* shift);
 
   void VisitEqual(HEqual* equal) OVERRIDE;
@@ -101,6 +104,8 @@
   void SimplifyCompare(HInvoke* invoke, bool is_signum, Primitive::Type type);
   void SimplifyIsNaN(HInvoke* invoke);
   void SimplifyFP2Int(HInvoke* invoke);
+  void SimplifyStringCharAt(HInvoke* invoke);
+  void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
@@ -234,21 +239,40 @@
 
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
-  HConstant* input_cst = instruction->GetConstantRight();
-  HInstruction* input_other = instruction->GetLeastConstantLeft();
+  HInstruction* shift_amount = instruction->GetRight();
+  HInstruction* value = instruction->GetLeft();
 
-  if (input_cst != nullptr) {
-    int64_t cst = Int64FromConstant(input_cst);
-    int64_t mask = (input_other->GetType() == Primitive::kPrimLong)
-        ? kMaxLongShiftDistance
-        : kMaxIntShiftDistance;
-    if ((cst & mask) == 0) {
+  int64_t implicit_mask = (value->GetType() == Primitive::kPrimLong)
+      ? kMaxLongShiftDistance
+      : kMaxIntShiftDistance;
+
+  if (shift_amount->IsConstant()) {
+    int64_t cst = Int64FromConstant(shift_amount->AsConstant());
+    if ((cst & implicit_mask) == 0) {
       // Replace code looking like
-      //    SHL dst, src, 0
+      //    SHL dst, value, 0
       // with
-      //    src
-      instruction->ReplaceWith(input_other);
+      //    value
+      instruction->ReplaceWith(value);
       instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
+    }
+  }
+
+  // Shift operations implicitly mask the shift amount according to the type width. Get rid of
+  // unnecessary explicit masking operations on the shift amount.
+  // Replace code looking like
+  //    AND masked_shift, shift, <superset of implicit mask>
+  //    SHL dst, value, masked_shift
+  // with
+  //    SHL dst, value, shift
+  if (shift_amount->IsAnd()) {
+    HAnd* and_insn = shift_amount->AsAnd();
+    HConstant* mask = and_insn->GetConstantRight();
+    if ((mask != nullptr) && ((Int64FromConstant(mask) & implicit_mask) == implicit_mask)) {
+      instruction->ReplaceInput(and_insn->GetLeastConstantLeft(), 1);
+      RecordSimplification();
     }
   }
 }
@@ -277,6 +301,7 @@
   if (!shl->GetRight()->HasUses()) {
     shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight());
   }
+  RecordSimplification();
   return true;
 }
 
@@ -895,6 +920,7 @@
 void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
+  bool integral_type = Primitive::IsIntegralType(instruction->GetType());
   if ((input_cst != nullptr) && input_cst->IsArithmeticZero()) {
     // Replace code looking like
     //    ADD dst, src, 0
@@ -903,9 +929,10 @@
     // Note that we cannot optimize `x + 0.0` to `x` for floating-point. When
     // `x` is `-0.0`, the former expression yields `0.0`, while the later
     // yields `-0.0`.
-    if (Primitive::IsIntegralType(instruction->GetType())) {
+    if (integral_type) {
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
       return;
     }
   }
@@ -940,7 +967,39 @@
     return;
   }
 
-  TryReplaceWithRotate(instruction);
+  if (TryReplaceWithRotate(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
+
+  if ((left->IsSub() || right->IsSub()) &&
+      TrySubtractionChainSimplification(instruction)) {
+    return;
+  }
+
+  if (integral_type) {
+    // Replace code patterns looking like
+    //    SUB dst1, x, y        SUB dst1, x, y
+    //    ADD dst2, dst1, y     ADD dst2, y, dst1
+    // with
+    //    SUB dst1, x, y
+    // ADD instruction is not needed in this case, we may use
+    // one of inputs of SUB instead.
+    if (left->IsSub() && left->InputAt(1) == right) {
+      instruction->ReplaceWith(left->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (right->IsSub() && right->InputAt(1) == left) {
+      instruction->ReplaceWith(right->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
+  }
 }
 
 void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -998,10 +1057,17 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
-  TryDeMorganNegationFactoring(instruction);
+  if (TryDeMorganNegationFactoring(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) {
@@ -1115,6 +1181,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1175,6 +1242,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1215,6 +1283,8 @@
       //    0
       instruction->ReplaceWith(input_cst);
       instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
     } else if (IsPowerOfTwo(factor)) {
       // Replace code looking like
       //    MUL dst, src, pow_of_2
@@ -1224,6 +1294,7 @@
       HShl* shl = new (allocator) HShl(type, input_other, shift);
       block->ReplaceAndRemoveInstructionWith(instruction, shl);
       RecordSimplification();
+      return;
     } else if (IsPowerOfTwo(factor - 1)) {
       // Transform code looking like
       //    MUL dst, src, (2^n + 1)
@@ -1238,6 +1309,7 @@
       block->InsertInstructionBefore(shl, instruction);
       block->ReplaceAndRemoveInstructionWith(instruction, add);
       RecordSimplification();
+      return;
     } else if (IsPowerOfTwo(factor + 1)) {
       // Transform code looking like
       //    MUL dst, src, (2^n - 1)
@@ -1252,8 +1324,13 @@
       block->InsertInstructionBefore(shl, instruction);
       block->ReplaceAndRemoveInstructionWith(instruction, sub);
       RecordSimplification();
+      return;
     }
   }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) {
@@ -1333,6 +1410,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1346,12 +1424,19 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
   if (TryDeMorganNegationFactoring(instruction)) return;
 
-  TryReplaceWithRotate(instruction);
+  if (TryReplaceWithRotate(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitShl(HShl* instruction) {
@@ -1381,6 +1466,7 @@
     // yields `-0.0`.
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1441,6 +1527,34 @@
     instruction->GetBlock()->RemoveInstruction(instruction);
     RecordSimplification();
     left->GetBlock()->RemoveInstruction(left);
+    return;
+  }
+
+  if (TrySubtractionChainSimplification(instruction)) {
+    return;
+  }
+
+  if (left->IsAdd()) {
+    // Replace code patterns looking like
+    //    ADD dst1, x, y        ADD dst1, x, y
+    //    SUB dst2, dst1, y     SUB dst2, dst1, x
+    // with
+    //    ADD dst1, x, y
+    // SUB instruction is not needed in this case, we may use
+    // one of inputs of ADD instead.
+    // It is applicable to integral types only.
+    DCHECK(Primitive::IsIntegralType(type));
+    if (left->InputAt(1) == right) {
+      instruction->ReplaceWith(left->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (left->InputAt(0) == right) {
+      instruction->ReplaceWith(left->InputAt(1));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
   }
 }
 
@@ -1459,6 +1573,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1493,7 +1608,13 @@
     return;
   }
 
-  TryReplaceWithRotate(instruction);
+  if (TryReplaceWithRotate(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
@@ -1538,7 +1659,7 @@
   HRor* ror = new (GetGraph()->GetArena()) HRor(type, value, distance);
   invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror);
   // Remove ClinitCheck and LoadClass, if possible.
-  HInstruction* clinit = invoke->InputAt(invoke->InputCount() - 1);
+  HInstruction* clinit = invoke->GetInputs().back();
   if (clinit->IsClinitCheck() && !clinit->HasUses()) {
     clinit->GetBlock()->RemoveInstruction(clinit);
     HInstruction* ldclass = clinit->InputAt(0);
@@ -1673,6 +1794,46 @@
   invoke->ReplaceWithExceptInReplacementAtIndex(select, 0);  // false at index 0
 }
 
+void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) {
+  HInstruction* str = invoke->InputAt(0);
+  HInstruction* index = invoke->InputAt(1);
+  uint32_t dex_pc = invoke->GetDexPc();
+  ArenaAllocator* arena = GetGraph()->GetArena();
+  // We treat String as an array to allow DCE and BCE to seamlessly work on strings,
+  // so create the HArrayLength, HBoundsCheck and HArrayGet.
+  HArrayLength* length = new (arena) HArrayLength(str, dex_pc, /* is_string_length */ true);
+  invoke->GetBlock()->InsertInstructionBefore(length, invoke);
+  HBoundsCheck* bounds_check =
+      new (arena) HBoundsCheck(index, length, dex_pc, invoke->GetDexMethodIndex());
+  invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke);
+  HArrayGet* array_get =
+      new (arena) HArrayGet(str, index, Primitive::kPrimChar, dex_pc, /* is_string_char_at */ true);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get);
+  bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment());
+  GetGraph()->SetHasBoundsChecks(true);
+}
+
+void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke) {
+  HInstruction* str = invoke->InputAt(0);
+  uint32_t dex_pc = invoke->GetDexPc();
+  // We treat String as an array to allow DCE and BCE to seamlessly work on strings,
+  // so create the HArrayLength.
+  HArrayLength* length =
+      new (GetGraph()->GetArena()) HArrayLength(str, dex_pc, /* is_string_length */ true);
+  HInstruction* replacement;
+  if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) {
+    // For String.isEmpty(), create the `HEqual` representing the `length == 0`.
+    invoke->GetBlock()->InsertInstructionBefore(length, invoke);
+    HIntConstant* zero = GetGraph()->GetIntConstant(0);
+    HEqual* equal = new (GetGraph()->GetArena()) HEqual(length, zero, dex_pc);
+    replacement = equal;
+  } else {
+    DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringLength);
+    replacement = length;
+  }
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement);
+}
+
 void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
   uint32_t dex_pc = invoke->GetDexPc();
   HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
@@ -1719,6 +1880,13 @@
     case Intrinsics::kDoubleDoubleToLongBits:
       SimplifyFP2Int(instruction);
       break;
+    case Intrinsics::kStringCharAt:
+      SimplifyStringCharAt(instruction);
+      break;
+    case Intrinsics::kStringIsEmpty:
+    case Intrinsics::kStringLength:
+      SimplifyStringIsEmptyOrLength(instruction);
+      break;
     case Intrinsics::kUnsafeLoadFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
       break;
@@ -1745,4 +1913,150 @@
   }
 }
 
+// Replace code looking like
+//    OP y, x, const1
+//    OP z, y, const2
+// with
+//    OP z, x, const3
+// where OP is both an associative and a commutative operation.
+bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation(
+    HBinaryOperation* instruction) {
+  DCHECK(instruction->IsCommutative());
+
+  if (!Primitive::IsIntegralType(instruction->GetType())) {
+    return false;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  // Variable names as described above.
+  HConstant* const2;
+  HBinaryOperation* y;
+
+  if (instruction->InstructionTypeEquals(left) && right->IsConstant()) {
+    const2 = right->AsConstant();
+    y = left->AsBinaryOperation();
+  } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) {
+    const2 = left->AsConstant();
+    y = right->AsBinaryOperation();
+  } else {
+    // The node does not match the pattern.
+    return false;
+  }
+
+  // If `y` has more than one use, we do not perform the optimization
+  // because it might increase code size (e.g. if the new constant is
+  // no longer encodable as an immediate operand in the target ISA).
+  if (!y->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // GetConstantRight() can return both left and right constants
+  // for commutative operations.
+  HConstant* const1 = y->GetConstantRight();
+  if (const1 == nullptr) {
+    return false;
+  }
+
+  instruction->ReplaceInput(const1, 0);
+  instruction->ReplaceInput(const2, 1);
+  HConstant* const3 = instruction->TryStaticEvaluation();
+  DCHECK(const3 != nullptr);
+  instruction->ReplaceInput(y->GetLeastConstantLeft(), 0);
+  instruction->ReplaceInput(const3, 1);
+  RecordSimplification();
+  return true;
+}
+
+static HBinaryOperation* AsAddOrSub(HInstruction* binop) {
+  return (binop->IsAdd() || binop->IsSub()) ? binop->AsBinaryOperation() : nullptr;
+}
+
+// Helper function that performs addition statically, considering the result type.
+static int64_t ComputeAddition(Primitive::Type type, int64_t x, int64_t y) {
+  // Use the Compute() method for consistency with TryStaticEvaluation().
+  if (type == Primitive::kPrimInt) {
+    return HAdd::Compute<int32_t>(x, y);
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimLong);
+    return HAdd::Compute<int64_t>(x, y);
+  }
+}
+
+// Helper function that handles the child classes of HConstant
+// and returns an integer with the appropriate sign.
+static int64_t GetValue(HConstant* constant, bool is_negated) {
+  int64_t ret = Int64FromConstant(constant);
+  return is_negated ? -ret : ret;
+}
+
+// Replace code looking like
+//    OP1 y, x, const1
+//    OP2 z, y, const2
+// with
+//    OP3 z, x, const3
+// where OPx is either ADD or SUB, and at least one of OP{1,2} is SUB.
+bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
+    HBinaryOperation* instruction) {
+  DCHECK(instruction->IsAdd() || instruction->IsSub()) << instruction->DebugName();
+
+  Primitive::Type type = instruction->GetType();
+  if (!Primitive::IsIntegralType(type)) {
+    return false;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  // Variable names as described above.
+  HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant();
+  if (const2 == nullptr) {
+    return false;
+  }
+
+  HBinaryOperation* y = (AsAddOrSub(left) != nullptr)
+      ? left->AsBinaryOperation()
+      : AsAddOrSub(right);
+  // If y has more than one use, we do not perform the optimization because
+  // it might increase code size (e.g. if the new constant is no longer
+  // encodable as an immediate operand in the target ISA).
+  if ((y == nullptr) || !y->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  left = y->GetLeft();
+  HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant();
+  if (const1 == nullptr) {
+    return false;
+  }
+
+  HInstruction* x = (const1 == left) ? y->GetRight() : left;
+  // If both inputs are constants, let the constant folding pass deal with it.
+  if (x->IsConstant()) {
+    return false;
+  }
+
+  bool is_const2_negated = (const2 == right) && instruction->IsSub();
+  int64_t const2_val = GetValue(const2, is_const2_negated);
+  bool is_y_negated = (y == right) && instruction->IsSub();
+  right = y->GetRight();
+  bool is_const1_negated = is_y_negated ^ ((const1 == right) && y->IsSub());
+  int64_t const1_val = GetValue(const1, is_const1_negated);
+  bool is_x_negated = is_y_negated ^ ((x == right) && y->IsSub());
+  int64_t const3_val = ComputeAddition(type, const1_val, const2_val);
+  HBasicBlock* block = instruction->GetBlock();
+  HConstant* const3 = block->GetGraph()->GetConstant(type, const3_val);
+  ArenaAllocator* arena = instruction->GetArena();
+  HInstruction* z;
+
+  if (is_x_negated) {
+    z = new (arena) HSub(type, const3, x, instruction->GetDexPc());
+  } else {
+    z = new (arena) HAdd(type, x, const3, instruction->GetDexPc());
+  }
+
+  block->ReplaceAndRemoveInstructionWith(instruction, z);
+  RecordSimplification();
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index cd026b8..495f3fd 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
+#include "code_generator.h"
 #include "instruction_simplifier_arm.h"
 #include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
 
 namespace art {
 namespace arm {
@@ -38,6 +40,46 @@
   }
 }
 
+void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
+  size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  Primitive::Type type = instruction->GetType();
+
+  if (type == Primitive::kPrimLong
+      || type == Primitive::kPrimFloat
+      || type == Primitive::kPrimDouble) {
+    // T32 doesn't support ShiftedRegOffset mem address mode for these types
+    // to enable optimization.
+    return;
+  }
+
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) {
+  size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+  size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
+  Primitive::Type type = instruction->GetComponentType();
+
+  if (type == Primitive::kPrimLong
+      || type == Primitive::kPrimFloat
+      || type == Primitive::kPrimDouble) {
+    // T32 doesn't support ShiftedRegOffset mem address mode for these types
+    // to enable optimization.
+    return;
+  }
+
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
+}
 
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 14c940e..782110c 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -38,6 +38,8 @@
   void VisitMul(HMul* instruction) OVERRIDE;
   void VisitOr(HOr* instruction) OVERRIDE;
   void VisitAnd(HAnd* instruction) OVERRIDE;
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+  void VisitArraySet(HArraySet* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
@@ -46,7 +48,9 @@
 class InstructionSimplifierArm : public HOptimization {
  public:
   InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, "instruction_simplifier_arm", stats) {}
+    : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+
+  static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
 
   void Run() OVERRIDE {
     InstructionSimplifierArmVisitor visitor(graph_, stats_);
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index e4a711e..6d107d5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -19,6 +19,7 @@
 #include "common_arm64.h"
 #include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 namespace art {
 namespace arm64 {
@@ -27,57 +28,6 @@
 using helpers::HasShifterOperand;
 using helpers::ShifterOperandSupportsExtension;
 
-void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
-                                                                     HInstruction* array,
-                                                                     HInstruction* index,
-                                                                     int access_size) {
-  if (kEmitCompilerReadBarrier) {
-    // The read barrier instrumentation does not support the
-    // HArm64IntermediateAddress instruction yet.
-    //
-    // TODO: Handle this case properly in the ARM64 code generator and
-    // re-enable this optimization; otherwise, remove this TODO.
-    // b/26601270
-    return;
-  }
-  if (index->IsConstant() ||
-      (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
-    // When the index is a constant all the addressing can be fitted in the
-    // memory access instruction, so do not split the access.
-    return;
-  }
-  if (access->IsArraySet() &&
-      access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
-    // The access may require a runtime call or the original array pointer.
-    return;
-  }
-
-  // Proceed to extract the base address computation.
-  ArenaAllocator* arena = GetGraph()->GetArena();
-
-  HIntConstant* offset =
-      GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value());
-  HArm64IntermediateAddress* address =
-      new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
-  address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
-  access->GetBlock()->InsertInstructionBefore(address, access);
-  access->ReplaceInput(address, 0);
-  // Both instructions must depend on GC to prevent any instruction that can
-  // trigger GC to be inserted between the two.
-  access->AddSideEffects(SideEffects::DependsOnGC());
-  DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
-  DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
-  // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
-  // is an HArm64IntermediateAddress and generate appropriate code.
-  // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
-  // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would
-  // not bring any advantages yet.
-  // Also see the comments in
-  // `InstructionCodeGeneratorARM64::VisitArrayGet()` and
-  // `InstructionCodeGeneratorARM64::VisitArraySet()`.
-  RecordSimplification();
-}
-
 bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
                                                                    HInstruction* bitfield_op,
                                                                    bool do_merge) {
@@ -189,17 +139,24 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
-  TryExtractArrayAccessAddress(instruction,
-                               instruction->GetArray(),
-                               instruction->GetIndex(),
-                               Primitive::ComponentSize(instruction->GetType()));
+  size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
-  TryExtractArrayAccessAddress(instruction,
-                               instruction->GetArray(),
-                               instruction->GetIndex(),
-                               Primitive::ComponentSize(instruction->GetComponentType()));
+  size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+  size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index da26998..f71684e 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -35,10 +35,6 @@
     }
   }
 
-  void TryExtractArrayAccessAddress(HInstruction* access,
-                                    HInstruction* array,
-                                    HInstruction* index,
-                                    int access_size);
   bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
   bool TryMergeIntoShifterOperand(HInstruction* use,
                                   HInstruction* bitfield_op,
@@ -86,8 +82,9 @@
 class InstructionSimplifierArm64 : public HOptimization {
  public:
   InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, "instruction_simplifier_arm64", stats) {}
-
+    : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+  static constexpr const char* kInstructionSimplifierArm64PassName
+      = "instruction_simplifier_arm64";
   void Run() OVERRIDE {
     InstructionSimplifierArm64Visitor visitor(graph_, stats_);
     visitor.VisitReversePostOrder();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index dab1ebc..8f7778f 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -226,4 +226,59 @@
   return false;
 }
 
+
+bool TryExtractArrayAccessAddress(HInstruction* access,
+                                  HInstruction* array,
+                                  HInstruction* index,
+                                  size_t data_offset) {
+  if (kEmitCompilerReadBarrier) {
+    // The read barrier instrumentation does not support the
+    // HIntermediateAddress instruction yet.
+    //
+    // TODO: Handle this case properly in the ARM64 and ARM code generator and
+    // re-enable this optimization; otherwise, remove this TODO.
+    // b/26601270
+    return false;
+  }
+  if (index->IsConstant() ||
+      (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
+    // When the index is a constant all the addressing can be fitted in the
+    // memory access instruction, so do not split the access.
+    return false;
+  }
+  if (access->IsArraySet() &&
+      access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
+    // The access may require a runtime call or the original array pointer.
+    return false;
+  }
+
+  // Proceed to extract the base address computation.
+  HGraph* graph = access->GetBlock()->GetGraph();
+  ArenaAllocator* arena = graph->GetArena();
+
+  HIntConstant* offset = graph->GetIntConstant(data_offset);
+  HIntermediateAddress* address =
+      new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+  address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
+  access->GetBlock()->InsertInstructionBefore(address, access);
+  access->ReplaceInput(address, 0);
+  // Both instructions must depend on GC to prevent any instruction that can
+  // trigger GC to be inserted between the two.
+  access->AddSideEffects(SideEffects::DependsOnGC());
+  DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+  DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+  // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
+  // is an HIntermediateAddress and generate appropriate code.
+  // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
+  // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes
+  // because these new instructions would not bring any advantages yet.
+  // Also see the comments in
+  // `InstructionCodeGeneratorARM::VisitArrayGet()`
+  // `InstructionCodeGeneratorARM::VisitArraySet()`
+  // `InstructionCodeGeneratorARM64::VisitArrayGet()`
+  // `InstructionCodeGeneratorARM64::VisitArraySet()`.
+  return true;
+}
+
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index b1fe8f4..56804f5 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -26,6 +26,11 @@
 // a negated bitwise instruction.
 bool TryMergeNegatedInput(HBinaryOperation* op);
 
+bool TryExtractArrayAccessAddress(HInstruction* access,
+                                  HInstruction* array,
+                                  HInstruction* index,
+                                  size_t data_offset);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 5d4c4e2..418d59c 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -388,10 +388,8 @@
     case kIntrinsicGetCharsNoCheck:
       return Intrinsics::kStringGetCharsNoCheck;
     case kIntrinsicIsEmptyOrLength:
-      // The inliner can handle these two cases - and this is the preferred approach
-      // since after inlining the call is no longer visible (as opposed to waiting
-      // until codegen to handle intrinsic).
-      return Intrinsics::kNone;
+      return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
+          Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
     case kIntrinsicIndexOf:
       return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
           Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 863dd1c..1a8eb58 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -27,8 +27,9 @@
 class CompilerDriver;
 class DexFile;
 
-// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
-static constexpr bool kRoundIsPlusPointFive = false;
+// Positive floating-point infinities.
+static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
+static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
 
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
@@ -161,7 +162,7 @@
 void Set##name() { SetBit(k##name); }                                 \
 bool Get##name() const { return IsBitSet(k##name); }                  \
 private:                                                              \
-static constexpr size_t k##name = bit + kNumberOfGenericOptimizations
+static constexpr size_t k##name = (bit) + kNumberOfGenericOptimizations
 
 class StringEqualsOptimizations : public IntrinsicOptimizations {
  public:
@@ -235,6 +236,9 @@
 UNREACHABLE_INTRINSIC(Arch, LongCompare)            \
 UNREACHABLE_INTRINSIC(Arch, IntegerSignum)          \
 UNREACHABLE_INTRINSIC(Arch, LongSignum)             \
+UNREACHABLE_INTRINSIC(Arch, StringCharAt)           \
+UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)          \
+UNREACHABLE_INTRINSIC(Arch, StringLength)           \
 UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)        \
 UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)       \
 UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 6c253ad..0bbc0e5 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,25 +41,98 @@
 
 using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
 
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->  // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
+      : SlowPathCode(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+    Register dest = locations->InAt(2).AsRegister<Register>();
+    Location dest_pos = locations->InAt(3);
+    Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
+    Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
+    Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
+    Register tmp = locations->GetTemp(3).AsRegister<Register>();
+
+    __ Bind(GetEntryLabel());
+    // Compute the base destination address in `dst_curr_addr`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
+    } else {
+      __ add(dst_curr_addr,
+             dest,
+             ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+      __ AddConstant(dst_curr_addr, offset);
+    }
+
+    Label loop;
+    __ Bind(&loop);
+    __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
+    __ MaybeUnpoisonHeapReference(tmp);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // tmp = ReadBarrier::Mark(tmp);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+    // explanations.)
+    DCHECK_NE(tmp, SP);
+    DCHECK_NE(tmp, LR);
+    DCHECK_NE(tmp, PC);
+    // IP is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary (and not preserved).  It thus cannot be used by
+    // any live register in this slow path.
+    DCHECK_NE(src_curr_addr, IP);
+    DCHECK_NE(dst_curr_addr, IP);
+    DCHECK_NE(src_stop_addr, IP);
+    DCHECK_NE(tmp, IP);
+    DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ MaybePoisonHeapReference(tmp);
+    __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
+    __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
+    __ b(&loop, NE);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
+};
+
+#undef __
+
 bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathARM slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathARM for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -524,8 +597,8 @@
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
           Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
           if (is_volatile) {
             __ dmb(ISH);
           }
@@ -581,10 +654,11 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+    // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -919,9 +993,10 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS below).
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers.
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
   }
@@ -932,86 +1007,141 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-
-  locations->AddTemp(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) {
-  ArmAssembler* assembler = GetAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array
-  const MemberOffset value_offset = mirror::String::ValueOffset();
-  // Location of count
-  const MemberOffset count_offset = mirror::String::CountOffset();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();  // String object pointer.
-  Register idx = locations->InAt(1).AsRegister<Register>();  // Index of character.
-  Register out = locations->Out().AsRegister<Register>();    // Result character.
-
-  Register temp = locations->GetTemp(0).AsRegister<Register>();
-  Register array_temp = locations->GetTemp(1).AsRegister<Register>();
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  __ ldr(temp, Address(obj, count_offset.Int32Value()));          // temp = str.length.
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ cmp(idx, ShifterOperand(temp));
-  __ b(slow_path->GetEntryLabel(), CS);
-
-  __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value()));  // array_temp := str.value.
-
-  // Load the value.
-  __ ldrh(out, Address(array_temp, idx, LSL, 1));                 // out := array_temp[idx].
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
                                                             kIntrinsified);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-  locations->SetOut(Location::RegisterLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  Register temp0 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp1 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+
+  Label loop;
+  Label find_char_diff;
+  Label end;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  Register argument = locations->InAt(1).AsRegister<Register>();
-  __ cmp(argument, ShifterOperand(0));
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
-  __ b(slow_path->GetEntryLabel(), EQ);
+  // Take slow path and throw if input can be and is null.
+  SlowPathCode* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
+  }
 
-  __ LoadFromOffset(
-      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pStringCompareTo).Int32Value());
-  __ blx(LR);
-  __ Bind(slow_path->GetExitLabel());
+  // Reference equality check, return 0 if same reference.
+  __ subs(out, str, ShifterOperand(arg));
+  __ b(&end, EQ);
+  // Load lengths of this and argument strings.
+  __ ldr(temp2, Address(str, count_offset));
+  __ ldr(temp1, Address(arg, count_offset));
+  // out = length diff.
+  __ subs(out, temp2, ShifterOperand(temp1));
+  // temp0 = min(len(str), len(arg)).
+  __ it(Condition::LT, kItElse);
+  __ mov(temp0, ShifterOperand(temp2), Condition::LT);
+  __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+  // Shorter string is empty?
+  __ CompareAndBranchIfZero(temp0, &end);
+
+  // Store offset of string value in preparation for comparison loop.
+  __ mov(temp1, ShifterOperand(value_offset));
+
+  // Assertions that must hold in order to compare multiple characters at a time.
+  CHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment),
+                "String data must be 8-byte aligned for unrolled CompareTo loop.");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
+  __ Bind(&loop);
+  __ ldr(IP, Address(str, temp1));
+  __ ldr(temp2, Address(arg, temp1));
+  __ cmp(IP, ShifterOperand(temp2));
+  __ b(&find_char_diff, NE);
+  __ add(temp1, temp1, ShifterOperand(char_size * 2));
+  __ sub(temp0, temp0, ShifterOperand(2));
+
+  __ ldr(IP, Address(str, temp1));
+  __ ldr(temp2, Address(arg, temp1));
+  __ cmp(IP, ShifterOperand(temp2));
+  __ b(&find_char_diff, NE);
+  __ add(temp1, temp1, ShifterOperand(char_size * 2));
+  __ subs(temp0, temp0, ShifterOperand(2));
+
+  __ b(&loop, GT);
+  __ b(&end);
+
+  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ eor(temp1, temp2, ShifterOperand(IP));
+  __ rbit(temp1, temp1);
+  __ clz(temp1, temp1);
+
+  // temp0 = number of 16-bit characters remaining to compare.
+  // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
+  // after the end of the shorter string data).
+
+  // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
+  // interval [0,1] (0 for low half-word different, 1 for high half-word different).
+
+  // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
+  // return length diff (out).
+  __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
+  __ b(&end, LE);
+  // Extract the characters and calculate the difference.
+  __ bic(temp1, temp1, ShifterOperand(0xf));
+  __ Lsr(temp2, temp2, temp1);
+  __ Lsr(IP, IP, temp1);
+  __ movt(temp2, 0);
+  __ movt(IP, 0);
+  __ sub(out, IP, ShifterOperand(temp2));
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringEquals(HInvoke* invoke) {
@@ -1055,17 +1185,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ CompareAndBranchIfZero(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ CompareAndBranchIfZero(arg, &return_false);
+  }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ ldr(temp, Address(str, class_offset));
-  __ ldr(temp1, Address(arg, class_offset));
-  __ cmp(temp, ShifterOperand(temp1));
-  __ b(&return_false, NE);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ ldr(temp, Address(str, class_offset));
+    __ ldr(temp1, Address(arg, class_offset));
+    __ cmp(temp, ShifterOperand(temp1));
+    __ b(&return_false, NE);
+  }
 
   // Load lengths of this and argument strings.
   __ ldr(temp, Address(str, count_offset));
@@ -1082,7 +1217,7 @@
 
   // Assertions that must hold in order to compare strings 2 characters at a time.
   DCHECK_ALIGNED(value_offset, 4);
-  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+  static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
 
   __ LoadImmediate(temp1, value_offset);
 
@@ -1115,16 +1250,16 @@
                                        ArenaAllocator* allocator,
                                        bool start_at_zero) {
   LocationSummary* locations = invoke->GetLocations();
-  Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
 
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
         std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -1134,23 +1269,25 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     Register char_reg = locations->InAt(1).AsRegister<Register>();
-    __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max());
-    __ cmp(char_reg, ShifterOperand(tmp_reg));
+    // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
+    __ cmp(char_reg,
+           ShifterOperand(static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1));
     slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
     codegen->AddSlowPath(slow_path);
-    __ b(slow_path->GetEntryLabel(), HI);
+    __ b(slow_path->GetEntryLabel(), HS);
   }
 
   if (start_at_zero) {
+    Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
     DCHECK_EQ(tmp_reg, R2);
     // Start-index = 0.
     __ LoadImmediate(tmp_reg, 0);
   }
 
   __ LoadFromOffset(kLoadWord, LR, TR,
-                    QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pIndexOf).Int32Value());
   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
   __ blx(LR);
 
@@ -1161,7 +1298,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1170,7 +1307,7 @@
   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(R0));
 
-  // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+  // Need to send start-index=0.
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
 }
 
@@ -1181,7 +1318,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1190,9 +1327,6 @@
   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   locations->SetOut(Location::RegisterLocation(R0));
-
-  // Need a temp for slow-path codepoint compare.
-  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1202,7 +1336,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1222,8 +1356,10 @@
   codegen_->AddSlowPath(slow_path);
   __ b(slow_path->GetEntryLabel(), EQ);
 
-  __ LoadFromOffset(
-      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value());
+  __ LoadFromOffset(kLoadWord,
+                    LR,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromBytes).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   __ blx(LR);
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1232,7 +1368,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1250,8 +1386,10 @@
   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
   //
   // all include a null check on `data` before calling that method.
-  __ LoadFromOffset(
-      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value());
+  __ LoadFromOffset(kLoadWord,
+                    LR,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromChars).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   __ blx(LR);
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1259,7 +1397,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1277,7 +1415,7 @@
   __ b(slow_path->GetEntryLabel(), EQ);
 
   __ LoadFromOffset(kLoadWord,
-      LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value());
+      LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromString).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   __ blx(LR);
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1285,6 +1423,12 @@
 }
 
 void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
   LocationSummary* locations = invoke->GetLocations();
   if (locations == nullptr) {
@@ -1304,6 +1448,13 @@
   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
     locations->SetInAt(4, Location::RequiresRegister());
   }
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Temporary register IP cannot be used in
+    // ReadBarrierSystemArrayCopySlowPathARM64 (because that register
+    // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+    // temporary register from the register allocator.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 static void CheckPosition(ArmAssembler* assembler,
@@ -1311,7 +1462,6 @@
                           Register input,
                           Location length,
                           SlowPathCode* slow_path,
-                          Register input_len,
                           Register temp,
                           bool length_is_input_length = false) {
   // Where is the length in the Array?
@@ -1332,8 +1482,8 @@
       }
     } else {
       // Check that length(input) >= pos.
-      __ LoadFromOffset(kLoadWord, input_len, input, length_offset);
-      __ subs(temp, input_len, ShifterOperand(pos_const));
+      __ LoadFromOffset(kLoadWord, temp, input, length_offset);
+      __ subs(temp, temp, ShifterOperand(pos_const));
       __ b(slow_path->GetEntryLabel(), LT);
 
       // Check that (length(input) - pos) >= length.
@@ -1369,11 +1519,11 @@
   }
 }
 
-// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
-// Note that this code path is not used (yet) because we do not
-// intrinsify methods that can go into the IntrinsicSlowPathARM
-// slow path.
 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
@@ -1381,18 +1531,22 @@
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   Register src = locations->InAt(0).AsRegister<Register>();
   Location src_pos = locations->InAt(1);
   Register dest = locations->InAt(2).AsRegister<Register>();
   Location dest_pos = locations->InAt(3);
   Location length = locations->InAt(4);
-  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
-  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
-  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+  Location temp1_loc = locations->GetTemp(0);
+  Register temp1 = temp1_loc.AsRegister<Register>();
+  Location temp2_loc = locations->GetTemp(1);
+  Register temp2 = temp2_loc.AsRegister<Register>();
+  Location temp3_loc = locations->GetTemp(2);
+  Register temp3 = temp3_loc.AsRegister<Register>();
 
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
 
   Label conditions_on_positions_validated;
   SystemArrayCopyOptimizations optimizations(invoke);
@@ -1408,7 +1562,7 @@
         DCHECK_GE(src_pos_constant, dest_pos_constant);
       } else if (src_pos_constant < dest_pos_constant) {
         __ cmp(src, ShifterOperand(dest));
-        __ b(slow_path->GetEntryLabel(), EQ);
+        __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
       }
 
       // Checked when building locations.
@@ -1420,7 +1574,7 @@
         __ b(&conditions_on_positions_validated, NE);
       }
       __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
-      __ b(slow_path->GetEntryLabel(), GT);
+      __ b(intrinsic_slow_path->GetEntryLabel(), GT);
     }
   } else {
     if (!optimizations.GetDestinationIsSource()) {
@@ -1433,19 +1587,19 @@
     } else {
       __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
     }
-    __ b(slow_path->GetEntryLabel(), LT);
+    __ b(intrinsic_slow_path->GetEntryLabel(), LT);
   }
 
   __ Bind(&conditions_on_positions_validated);
 
   if (!optimizations.GetSourceIsNotNull()) {
     // Bail out if the source is null.
-    __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
   }
 
   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
     // Bail out if the destination is null.
-    __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
   }
 
   // If the length is negative, bail out.
@@ -1454,7 +1608,7 @@
       !optimizations.GetCountIsSourceLength() &&
       !optimizations.GetCountIsDestinationLength()) {
     __ cmp(length.AsRegister<Register>(), ShifterOperand(0));
-    __ b(slow_path->GetEntryLabel(), LT);
+    __ b(intrinsic_slow_path->GetEntryLabel(), LT);
   }
 
   // Validity checks: source.
@@ -1462,9 +1616,8 @@
                 src_pos,
                 src,
                 length,
-                slow_path,
+                intrinsic_slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsSourceLength());
 
   // Validity checks: dest.
@@ -1472,9 +1625,8 @@
                 dest_pos,
                 dest,
                 length,
-                slow_path,
+                intrinsic_slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsDestinationLength());
 
   if (!optimizations.GetDoesNotNeedTypeCheck()) {
@@ -1482,113 +1634,287 @@
     // type of the destination array. We do two checks: the classes are the same,
     // or the destination is Object[]. If none of these checks succeed, we go to the
     // slow path.
-    __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
-    __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
-    bool did_unpoison = false;
-    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
-        !optimizations.GetSourceIsNonPrimitiveArray()) {
-      // One or two of the references need to be unpoisoned. Unpoison them
-      // both to make the identity check valid.
-      __ MaybeUnpoisonHeapReference(temp1);
-      __ MaybeUnpoisonHeapReference(temp2);
-      did_unpoison = true;
-    }
 
-    if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
-      // Bail out if the destination is not a non primitive array.
-      // /* HeapReference<Class> */ temp3 = temp1->component_type_
-      __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
-      __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
-      __ MaybeUnpoisonHeapReference(temp3);
-      __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
-      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
-    }
-
-    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
-      // Bail out if the source is not a non primitive array.
-      // /* HeapReference<Class> */ temp3 = temp2->component_type_
-      __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
-      __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
-      __ MaybeUnpoisonHeapReference(temp3);
-      __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
-      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
-    }
-
-    __ cmp(temp1, ShifterOperand(temp2));
-
-    if (optimizations.GetDestinationIsTypedObjectArray()) {
-      Label do_copy;
-      __ b(&do_copy, EQ);
-      if (!did_unpoison) {
-        __ MaybeUnpoisonHeapReference(temp1);
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp1` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+        __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
       }
-      // /* HeapReference<Class> */ temp1 = temp1->component_type_
-      __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-      __ MaybeUnpoisonHeapReference(temp1);
-      // /* HeapReference<Class> */ temp1 = temp1->super_class_
-      __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
-      // No need to unpoison the result, we're comparing against null.
-      __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
-      __ Bind(&do_copy);
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        //
+        // Register `temp1` is not trashed by the read barrier emitted
+        // by GenerateFieldLoadWithBakerReadBarrier below, as that
+        // method produces a call to a ReadBarrierMarkRegX entry point,
+        // which saves all potentially live registers, including
+        // temporaries such a `temp1`.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+        __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+        __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // For the same reason given earlier, `temp1` is not trashed by the
+      // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+      // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+      __ cmp(temp1, ShifterOperand(temp2));
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        Label do_copy;
+        __ b(&do_copy, EQ);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+      }
     } else {
-      __ b(slow_path->GetEntryLabel(), NE);
+      // Non read barrier code.
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+      bool did_unpoison = false;
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        __ MaybeUnpoisonHeapReference(temp1);
+        __ MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp1->component_type_
+        __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+        __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp2->component_type_
+        __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+        __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      __ cmp(temp1, ShifterOperand(temp2));
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        Label do_copy;
+        __ b(&do_copy, EQ);
+        if (!did_unpoison) {
+          __ MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+        __ MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+        // No need to unpoison the result, we're comparing against null.
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+      }
     }
   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
     // Bail out if the source is not a non primitive array.
-    // /* HeapReference<Class> */ temp1 = src->klass_
-    __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
-    __ MaybeUnpoisonHeapReference(temp1);
-    // /* HeapReference<Class> */ temp3 = temp1->component_type_
-    __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
-    __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
-    __ MaybeUnpoisonHeapReference(temp3);
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+      __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      // If heap poisoning is enabled, `temp3` has been unpoisoned
+      // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+      __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+      __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp3);
+    }
+    // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
     __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-    __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
   }
 
-  // Compute base source address, base destination address, and end source address.
-
-  uint32_t element_size = sizeof(int32_t);
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+  uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
   uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  // Compute the base source address in `temp1`.
   if (src_pos.IsConstant()) {
     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
     __ AddConstant(temp1, src, element_size * constant + offset);
   } else {
-    __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+    __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
     __ AddConstant(temp1, offset);
   }
 
-  if (dest_pos.IsConstant()) {
-    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
-    __ AddConstant(temp2, dest, element_size * constant + offset);
-  } else {
-    __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
-    __ AddConstant(temp2, offset);
-  }
-
+  // Compute the end source address in `temp3`.
   if (length.IsConstant()) {
     int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
     __ AddConstant(temp3, temp1, element_size * constant);
   } else {
-    __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+    __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
   }
 
-  // Iterate over the arrays and do a raw copy of the objects. We don't need to
-  // poison/unpoison, nor do any read barrier as the next uses of the destination
-  // array will do it.
-  Label loop, done;
-  __ cmp(temp1, ShifterOperand(temp3));
-  __ b(&done, EQ);
-  __ Bind(&loop);
-  __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
-  __ str(IP, Address(temp2, element_size, Address::PostIndex));
-  __ cmp(temp1, ShifterOperand(temp3));
-  __ b(&loop, NE);
-  __ Bind(&done);
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // The base destination address is computed later, as `temp2` is
+    // used for intermediate computations.
+
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       do {
+    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+    //       } while (src_ptr != end_ptr)
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    Label loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ cmp(temp1, ShifterOperand(temp3));
+    __ b(&done, EQ);
+
+    // /* int32_t */ monitor = src->monitor_
+    __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `src` is unchanged by this operation, but its value now depends
+    // on `temp2`.
+    __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCode* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+    // Carry flag is the last bit shifted out by LSRS.
+    __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+    // Fast-path copy.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ AddConstant(temp2, dest, element_size * constant + offset);
+    } else {
+      __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+      __ AddConstant(temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+    __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+    __ str(IP, Address(temp2, element_size, Address::PostIndex));
+    __ cmp(temp1, ShifterOperand(temp3));
+    __ b(&loop, NE);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ AddConstant(temp2, dest, element_size * constant + offset);
+    } else {
+      __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+      __ AddConstant(temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    Label loop, done;
+    __ cmp(temp1, ShifterOperand(temp3));
+    __ b(&done, EQ);
+    __ Bind(&loop);
+    __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+    __ str(IP, Address(temp2, element_size, Address::PostIndex));
+    __ cmp(temp1, ShifterOperand(temp3));
+    __ b(&loop, NE);
+    __ Bind(&done);
+  }
 
   // We only need one card marking on the destination array.
   codegen_->MarkGCCard(temp1,
@@ -1597,7 +1923,7 @@
                        Register(kNoRegister),
                        /* value_can_be_null */ false);
 
-  __ Bind(slow_path->GetExitLabel());
+  __ Bind(intrinsic_slow_path->GetExitLabel());
 }
 
 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1615,7 +1941,7 @@
   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   const InvokeRuntimeCallingConvention calling_convention;
 
@@ -1642,7 +1968,7 @@
   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   const InvokeRuntimeCallingConvention calling_convention;
 
@@ -1668,7 +1994,7 @@
   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
 
-  __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmWordSize>(entry).Int32Value());
+  __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value());
   // Native code uses the soft float ABI.
   __ vmovrrd(calling_convention.GetRegisterAt(0),
              calling_convention.GetRegisterAt(1),
@@ -1694,7 +2020,7 @@
   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(2)));
   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(3)));
 
-  __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmWordSize>(entry).Int32Value());
+  __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value());
   // Native code uses the soft float ABI.
   __ vmovrrd(calling_convention.GetRegisterAt(0),
              calling_convention.GetRegisterAt(1),
@@ -1929,6 +2255,51 @@
   __ revsh(out, in);
 }
 
+static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmAssembler* assembler) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
+
+  bool is_long = type == Primitive::kPrimLong;
+  LocationSummary* locations = instr->GetLocations();
+  Location in = locations->InAt(0);
+  Register src_0 = is_long ? in.AsRegisterPairLow<Register>() : in.AsRegister<Register>();
+  Register src_1 = is_long ? in.AsRegisterPairHigh<Register>() : src_0;
+  SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
+  DRegister tmp_d = FromLowSToD(tmp_s);
+  Register  out_r = locations->Out().AsRegister<Register>();
+
+  // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
+  // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
+  // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
+  // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
+  __ vmovdrr(tmp_d, src_1, src_0);                         // Temp DReg |--src_1|--src_0|
+  __ vcntd(tmp_d, tmp_d);                                  // Temp DReg |c|c|c|c|c|c|c|c|
+  __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true);     // Temp DReg |--c|--c|--c|--c|
+  __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true);    // Temp DReg |------c|------c|
+  if (is_long) {
+    __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true);  // Temp DReg |--------------c|
+  }
+  __ vmovrs(out_r, tmp_s);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+  invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) {
+  VisitIntegerBitCount(invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
+}
+
 void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kNoCall,
@@ -1939,7 +2310,7 @@
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
 
-  locations->AddTemp(Location::RequiresRegister());
+  // Temporary registers to store lengths of strings and for calculations.
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
@@ -1967,33 +2338,108 @@
   Register dstObj = locations->InAt(3).AsRegister<Register>();
   Register dstBegin = locations->InAt(4).AsRegister<Register>();
 
-  Register src_ptr = locations->GetTemp(0).AsRegister<Register>();
-  Register src_ptr_end = locations->GetTemp(1).AsRegister<Register>();
+  Register num_chr = locations->GetTemp(0).AsRegister<Register>();
+  Register src_ptr = locations->GetTemp(1).AsRegister<Register>();
   Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
-  Register tmp = locations->GetTemp(3).AsRegister<Register>();
 
   // src range to copy.
   __ add(src_ptr, srcObj, ShifterOperand(value_offset));
-  __ add(src_ptr_end, src_ptr, ShifterOperand(srcEnd, LSL, 1));
   __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
 
   // dst to be copied.
   __ add(dst_ptr, dstObj, ShifterOperand(data_offset));
   __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
 
+  __ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
+
   // Do the copy.
-  Label loop, done;
-  __ Bind(&loop);
-  __ cmp(src_ptr, ShifterOperand(src_ptr_end));
+  Label loop, remainder, done;
+
+  // Early out for valid zero-length retrievals.
   __ b(&done, EQ);
-  __ ldrh(tmp, Address(src_ptr, char_size, Address::PostIndex));
-  __ strh(tmp, Address(dst_ptr, char_size, Address::PostIndex));
-  __ b(&loop);
+
+  // Save repairing the value of num_chr on the < 4 character path.
+  __ subs(IP, num_chr, ShifterOperand(4));
+  __ b(&remainder, LT);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
+  __ mov(num_chr, ShifterOperand(IP));
+
+  // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
+  // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
+  // to rectify these everywhere this intrinsic applies.)
+  __ Bind(&loop);
+  __ ldr(IP, Address(src_ptr, char_size * 2));
+  __ subs(num_chr, num_chr, ShifterOperand(4));
+  __ str(IP, Address(dst_ptr, char_size * 2));
+  __ ldr(IP, Address(src_ptr, char_size * 4, Address::PostIndex));
+  __ str(IP, Address(dst_ptr, char_size * 4, Address::PostIndex));
+  __ b(&loop, GE);
+
+  __ adds(num_chr, num_chr, ShifterOperand(4));
+  __ b(&done, EQ);
+
+  // Main loop for < 4 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  __ ldrh(IP, Address(src_ptr, char_size, Address::PostIndex));
+  __ subs(num_chr, num_chr, ShifterOperand(1));
+  __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
+  __ b(&remainder, GT);
+
   __ Bind(&done);
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount)
+void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* const locations = invoke->GetLocations();
+  const Register out = locations->Out().AsRegister<Register>();
+  // Shifting left by 1 bit makes the value encodable as an immediate operand;
+  // we don't care about the sign bit anyway.
+  constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
+
+  __ vmovrs(out, locations->InAt(0).AsFpuRegister<SRegister>());
+  // We don't care about the sign bit, so shift left.
+  __ Lsl(out, out, 1);
+  __ eor(out, out, ShifterOperand(infinity));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* const locations = invoke->GetLocations();
+  const Register out = locations->Out().AsRegister<Register>();
+  // The highest 32 bits of double precision positive infinity separated into
+  // two constants encodable as immediate operands.
+  constexpr uint32_t infinity_high  = 0x7f000000U;
+  constexpr uint32_t infinity_high2 = 0x00f00000U;
+
+  static_assert((infinity_high | infinity_high2) == static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
+                "The constants do not add up to the high 32 bits of double precision positive infinity.");
+  __ vmovrrd(IP, out, FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+  __ eor(out, out, ShifterOperand(infinity_high));
+  __ eor(out, out, ShifterOperand(infinity_high2));
+  // We don't care about the sign bit, so shift left.
+  __ orr(out, IP, ShifterOperand(out, LSL, 1));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
@@ -2008,8 +2454,6 @@
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(ARM, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 696fa52..91374b3 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -26,12 +26,15 @@
 #include "mirror/string.h"
 #include "thread.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "utils/arm64/constants_arm64.h"
 
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
-using namespace vixl;   // NOLINT(build/namespaces)
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
 
 namespace art {
 
@@ -47,6 +50,7 @@
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
 using helpers::InputRegisterAt;
+using helpers::OutputRegister;
 
 namespace {
 
@@ -56,15 +60,15 @@
 
 }  // namespace
 
-vixl::MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
-  return codegen_->GetAssembler()->vixl_masm_;
+MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
+  return codegen_->GetVIXLAssembler();
 }
 
 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
   return codegen_->GetGraph()->GetArena();
 }
 
-#define __ codegen->GetAssembler()->vixl_masm_->
+#define __ codegen->GetVIXLAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
                                    Primitive::Type type,
@@ -140,6 +144,73 @@
   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
 };
 
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
+      : SlowPathCodeARM64(instruction), tmp_(tmp) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+    CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+    Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
+    Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
+    Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+    Register tmp_reg = WRegisterFrom(tmp_);
+
+    __ Bind(GetEntryLabel());
+    vixl::aarch64::Label slow_copy_loop;
+    __ Bind(&slow_copy_loop);
+    __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
+    codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // tmp_reg = ReadBarrier::Mark(tmp_reg);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
+    // explanations.)
+    DCHECK_NE(tmp_.reg(), LR);
+    DCHECK_NE(tmp_.reg(), WSP);
+    DCHECK_NE(tmp_.reg(), WZR);
+    // IP0 is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary (and not preserved).  It thus cannot be used by
+    // any live register in this slow path.
+    DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
+    DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
+    DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
+    DCHECK_NE(tmp_.reg(), IP0);
+    DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+    // This runtime call does not require a stack map.
+    codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
+    __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
+    __ Cmp(src_curr_addr, src_stop_addr);
+    __ B(&slow_copy_loop, ne);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
+
+ private:
+  Location tmp_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
+};
 #undef __
 
 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
@@ -148,19 +219,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathARM64 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathARM64 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -182,14 +240,14 @@
   locations->SetOut(Location::RequiresFpuRegister());
 }
 
-static void MoveFPToInt(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location input = locations->InAt(0);
   Location output = locations->Out();
   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
 }
 
-static void MoveIntToFP(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location input = locations->InAt(0);
   Location output = locations->Out();
   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
@@ -234,7 +292,7 @@
 
 static void GenReverseBytes(LocationSummary* locations,
                             Primitive::Type type,
-                            vixl::MacroAssembler* masm) {
+                            MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
 
@@ -288,7 +346,7 @@
 
 static void GenNumberOfLeadingZeros(LocationSummary* locations,
                                     Primitive::Type type,
-                                    vixl::MacroAssembler* masm) {
+                                    MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -315,7 +373,7 @@
 
 static void GenNumberOfTrailingZeros(LocationSummary* locations,
                                      Primitive::Type type,
-                                     vixl::MacroAssembler* masm) {
+                                     MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -343,7 +401,7 @@
 
 static void GenReverse(LocationSummary* locations,
                        Primitive::Type type,
-                       vixl::MacroAssembler* masm) {
+                       MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -368,7 +426,7 @@
   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
-static void GenBitCount(HInvoke* instr, Primitive::Type type, vixl::MacroAssembler* masm) {
+static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) {
   DCHECK(Primitive::IsIntOrLongType(type)) << type;
   DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
   DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
@@ -409,7 +467,7 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
-static void MathAbsFP(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
 
@@ -445,7 +503,7 @@
 
 static void GenAbsInteger(LocationSummary* locations,
                           bool is64bit,
-                          vixl::MacroAssembler* masm) {
+                          MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location output = locations->Out();
 
@@ -475,7 +533,7 @@
 static void GenMinMaxFP(LocationSummary* locations,
                         bool is_min,
                         bool is_double,
-                        vixl::MacroAssembler* masm) {
+                        MacroAssembler* masm) {
   Location op1 = locations->InAt(0);
   Location op2 = locations->InAt(1);
   Location out = locations->Out();
@@ -535,7 +593,7 @@
 static void GenMinMax(LocationSummary* locations,
                       bool is_min,
                       bool is_long,
-                      vixl::MacroAssembler* masm) {
+                      MacroAssembler* masm) {
   Location op1 = locations->InAt(0);
   Location op2 = locations->InAt(1);
   Location out = locations->Out();
@@ -586,7 +644,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -596,7 +654,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -606,7 +664,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -616,58 +674,70 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
-static void CreateFPToIntPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
   locations->SetOut(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
 }
 
-static void GenMathRound(LocationSummary* locations,
-                         bool is_double,
-                         vixl::MacroAssembler* masm) {
-  FPRegister in_reg = is_double ?
-      DRegisterFrom(locations->InAt(0)) : SRegisterFrom(locations->InAt(0));
-  Register out_reg = is_double ?
-      XRegisterFrom(locations->Out()) : WRegisterFrom(locations->Out());
-  UseScratchRegisterScope temps(masm);
-  FPRegister temp1_reg = temps.AcquireSameSizeAs(in_reg);
+static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
+  // Java 8 API definition for Math.round():
+  // Return the closest long or int to the argument, with ties rounding to positive infinity.
+  //
+  // There is no single instruction in ARMv8 that can support the above definition.
+  // We choose to use FCVTAS here, because it has closest semantic.
+  // FCVTAS performs rounding to nearest integer, ties away from zero.
+  // For most inputs (positive values, zero or NaN), this instruction is enough.
+  // We only need a few handling code after FCVTAS if the input is negative half value.
+  //
+  // The reason why we didn't choose FCVTPS instruction here is that
+  // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
+  // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
+  // If we were using this instruction, for most inputs, more handling code would be needed.
+  LocationSummary* l = invoke->GetLocations();
+  FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
+  FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
+  Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
+  vixl::aarch64::Label done;
 
-  // 0.5 can be encoded as an immediate, so use fmov.
-  if (is_double) {
-    __ Fmov(temp1_reg, static_cast<double>(0.5));
-  } else {
-    __ Fmov(temp1_reg, static_cast<float>(0.5));
-  }
-  __ Fadd(temp1_reg, in_reg, temp1_reg);
-  __ Fcvtms(out_reg, temp1_reg);
+  // Round to nearest integer, ties away from zero.
+  __ Fcvtas(out_reg, in_reg);
+
+  // For positive values, zero or NaN inputs, rounding is done.
+  __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
+
+  // Handle input < 0 cases.
+  // If input is negative but not a tie, previous result (round to nearest) is valid.
+  // If input is a negative tie, out_reg += 1.
+  __ Frinta(tmp_fp, in_reg);
+  __ Fsub(tmp_fp, in_reg, tmp_fp);
+  __ Fcmp(tmp_fp, 0.5);
+  __ Cinc(out_reg, out_reg, eq);
+
+  __ Bind(&done);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateFPToIntPlusTempLocations(arena_, invoke);
-  }
+  CreateFPToIntPlusFPTempLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), /* is_double */ true, GetVIXLAssembler());
+  GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateFPToIntPlusTempLocations(arena_, invoke);
-  }
+  CreateFPToIntPlusFPTempLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), /* is_double */ false, GetVIXLAssembler());
+  GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
@@ -675,7 +745,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -685,7 +755,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -695,7 +765,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -705,7 +775,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -723,7 +793,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -733,7 +803,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -743,7 +813,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -753,7 +823,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -767,7 +837,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
   codegen_->Load(Primitive::kPrimNot, WRegisterFrom(invoke->GetLocations()->Out()),
-                 MemOperand(tr, Thread::PeerOffset<8>().Int32Value()));
+                 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
 }
 
 static void GenUnsafeGet(HInvoke* invoke,
@@ -778,7 +848,7 @@
   DCHECK((type == Primitive::kPrimInt) ||
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
   Location base_loc = locations->InAt(1);
   Register base = WRegisterFrom(base_loc);      // Object pointer.
   Location offset_loc = locations->InAt(2);
@@ -790,8 +860,15 @@
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
     UseScratchRegisterScope temps(masm);
     Register temp = temps.AcquireW();
-    codegen->GenerateArrayLoadWithBakerReadBarrier(
-        invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+    codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+                                                       trg_loc,
+                                                       base,
+                                                       /* offset */ 0U,
+                                                       /* index */ offset_loc,
+                                                       /* scale_factor */ 0U,
+                                                       temp,
+                                                       /* needs_null_check */ false,
+                                                       is_volatile);
   } else {
     // Other cases.
     MemOperand mem_op(base.X(), offset);
@@ -820,7 +897,8 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
@@ -904,7 +982,7 @@
                          bool is_volatile,
                          bool is_ordered,
                          CodeGeneratorARM64* codegen) {
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
 
   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
@@ -1023,7 +1101,7 @@
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
 
@@ -1062,7 +1140,7 @@
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  vixl::Label loop_head, exit_loop;
+  vixl::aarch64::Label loop_head, exit_loop;
   __ Bind(&loop_head);
   // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
   // the reference stored in the object before attempting the CAS,
@@ -1101,9 +1179,10 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS below).
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers.
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
   }
@@ -1118,86 +1197,134 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  // In case we need to go in the slow path, we can't have the output be the same
-  // as the input: the current liveness analysis considers the input to be live
-  // at the point of the call.
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array
-  const MemberOffset value_offset = mirror::String::ValueOffset();
-  // Location of count
-  const MemberOffset count_offset = mirror::String::CountOffset();
-
-  Register obj = WRegisterFrom(locations->InAt(0));  // String object pointer.
-  Register idx = WRegisterFrom(locations->InAt(1));  // Index of character.
-  Register out = WRegisterFrom(locations->Out());    // Result character.
-
-  UseScratchRegisterScope temps(masm);
-  Register temp = temps.AcquireW();
-  Register array_temp = temps.AcquireW();            // We can trade this for worse scheduling.
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  __ Ldr(temp, HeapOperand(obj, count_offset));          // temp = str.length.
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ Cmp(idx, temp);
-  __ B(hs, slow_path->GetEntryLabel());
-
-  __ Add(array_temp, obj, Operand(value_offset.Int32Value()));  // array_temp := str.value.
-
-  // Load the value.
-  __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1));  // out := array_temp[idx].
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
                                                             kIntrinsified);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
-  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
+  Register str = InputRegisterAt(invoke, 0);
+  Register arg = InputRegisterAt(invoke, 1);
+  DCHECK(str.IsW());
+  DCHECK(arg.IsW());
+  Register out = OutputRegister(invoke);
+
+  Register temp0 = WRegisterFrom(locations->GetTemp(0));
+  Register temp1 = WRegisterFrom(locations->GetTemp(1));
+  Register temp2 = WRegisterFrom(locations->GetTemp(2));
+
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label find_char_diff;
+  vixl::aarch64::Label end;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  Register argument = WRegisterFrom(locations->InAt(1));
-  __ Cmp(argument, 0);
-  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-  __ B(eq, slow_path->GetEntryLabel());
+  // Take slow path and throw if input can be and is null.
+  SlowPathCodeARM64* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ Cbz(arg, slow_path->GetEntryLabel());
+  }
 
-  __ Ldr(
-      lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pStringCompareTo).Int32Value()));
-  __ Blr(lr);
-  __ Bind(slow_path->GetExitLabel());
+  // Reference equality check, return 0 if same reference.
+  __ Subs(out, str, arg);
+  __ B(&end, eq);
+  // Load lengths of this and argument strings.
+  __ Ldr(temp0, HeapOperand(str, count_offset));
+  __ Ldr(temp1, HeapOperand(arg, count_offset));
+  // Return zero if both strings are empty.
+  __ Orr(out, temp0, temp1);
+  __ Cbz(out, &end);
+  // out = length diff.
+  __ Subs(out, temp0, temp1);
+  // temp2 = min(len(str), len(arg)).
+  __ Csel(temp2, temp1, temp0, ge);
+  // Shorter string is empty?
+  __ Cbz(temp2, &end);
+
+  // Store offset of string value in preparation for comparison loop.
+  __ Mov(temp1, value_offset);
+
+  UseScratchRegisterScope scratch_scope(masm);
+  Register temp4 = scratch_scope.AcquireX();
+
+  // Assertions that must hold in order to compare strings 4 characters at a time.
+  DCHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Promote temp0 to an X reg, ready for LDR.
+  temp0 = temp0.X();
+
+  // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
+  __ Bind(&loop);
+  __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
+  __ Ldr(temp0, MemOperand(arg.X(), temp1.X()));
+  __ Cmp(temp4, temp0);
+  __ B(ne, &find_char_diff);
+  __ Add(temp1, temp1, char_size * 4);
+  __ Subs(temp2, temp2, 4);
+  __ B(gt, &loop);
+  __ B(&end);
+
+  // Promote temp1 to an X reg, ready for EOR.
+  temp1 = temp1.X();
+
+  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ Eor(temp1, temp0, temp4);
+  __ Rbit(temp1, temp1);
+  __ Clz(temp1, temp1);
+  // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
+  // the difference occurs outside the remaining string data, so just return length diff (out).
+  __ Cmp(temp2, Operand(temp1.W(), LSR, 4));
+  __ B(le, &end);
+  // Extract the characters and calculate the difference.
+  __ Bic(temp1, temp1, 0xf);
+  __ Lsr(temp0, temp0, temp1);
+  __ Lsr(temp4, temp4, temp1);
+  __ And(temp4, temp4, 0xffff);
+  __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
@@ -1214,7 +1341,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register str = WRegisterFrom(locations->InAt(0));
@@ -1226,10 +1353,10 @@
   Register temp1 = WRegisterFrom(locations->GetTemp(0));
   Register temp2 = WRegisterFrom(locations->GetTemp(1));
 
-  vixl::Label loop;
-  vixl::Label end;
-  vixl::Label return_true;
-  vixl::Label return_false;
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label end;
+  vixl::aarch64::Label return_true;
+  vixl::aarch64::Label return_false;
 
   // Get offsets of count, value, and class fields within a string object.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1239,21 +1366,26 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ Cbz(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Cbz(arg, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ Cmp(str, arg);
   __ B(&return_true, eq);
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ Ldr(temp, MemOperand(str.X(), class_offset));
-  __ Ldr(temp1, MemOperand(arg.X(), class_offset));
-  __ Cmp(temp, temp1);
-  __ B(&return_false, ne);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Ldr(temp, MemOperand(str.X(), class_offset));
+    __ Ldr(temp1, MemOperand(arg.X(), class_offset));
+    __ Cmp(temp, temp1);
+    __ B(&return_false, ne);
+  }
 
   // Load lengths of this and argument strings.
   __ Ldr(temp, MemOperand(str.X(), count_offset));
@@ -1297,21 +1429,21 @@
 }
 
 static void GenerateVisitStringIndexOf(HInvoke* invoke,
-                                       vixl::MacroAssembler* masm,
+                                       MacroAssembler* masm,
                                        CodeGeneratorARM64* codegen,
                                        ArenaAllocator* allocator,
                                        bool start_at_zero) {
   LocationSummary* locations = invoke->GetLocations();
-  Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
 
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCodeARM64* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
       slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
@@ -1320,21 +1452,21 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     Register char_reg = WRegisterFrom(locations->InAt(1));
-    __ Mov(tmp_reg, 0xFFFF);
-    __ Cmp(char_reg, Operand(tmp_reg));
+    __ Tst(char_reg, 0xFFFF0000);
     slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
     codegen->AddSlowPath(slow_path);
-    __ B(hi, slow_path->GetEntryLabel());
+    __ B(ne, slow_path->GetEntryLabel());
   }
 
   if (start_at_zero) {
     // Start-index = 0.
+    Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
     __ Mov(tmp_reg, 0);
   }
 
-  __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value()));
+  __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pIndexOf).Int32Value()));
   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
   __ Blr(lr);
 
@@ -1345,7 +1477,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1354,7 +1486,7 @@
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
 
-  // Need a temp for slow-path codepoint compare, and need to send start_index=0.
+  // Need to send start_index=0.
   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
 }
 
@@ -1365,7 +1497,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1374,9 +1506,6 @@
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
-
-  // Need a temp for slow-path codepoint compare.
-  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1386,7 +1515,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1397,7 +1526,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register byte_array = WRegisterFrom(locations->InAt(0));
@@ -1407,7 +1536,8 @@
   __ B(eq, slow_path->GetEntryLabel());
 
   __ Ldr(lr,
-      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value()));
+      MemOperand(tr,
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromBytes).Int32Value()));
   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   __ Blr(lr);
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1416,7 +1546,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1426,7 +1556,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
 
   // No need to emit code checking whether `locations->InAt(2)` is a null
   // pointer, as callers of the native method
@@ -1435,7 +1565,8 @@
   //
   // all include a null check on `data` before calling that method.
   __ Ldr(lr,
-      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value()));
+      MemOperand(tr,
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromChars).Int32Value()));
   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   __ Blr(lr);
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1443,7 +1574,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1451,7 +1582,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register string_to_copy = WRegisterFrom(locations->InAt(0));
@@ -1461,7 +1592,8 @@
   __ B(eq, slow_path->GetEntryLabel());
 
   __ Ldr(lr,
-      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value()));
+      MemOperand(tr,
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromString).Int32Value()));
   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   __ Blr(lr);
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1474,7 +1606,7 @@
   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
 
@@ -1489,7 +1621,7 @@
   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
 
@@ -1499,10 +1631,11 @@
 }
 
 static void GenFPToFPCall(HInvoke* invoke,
-                          vixl::MacroAssembler* masm,
+                          MacroAssembler* masm,
                           CodeGeneratorARM64* codegen,
                           QuickEntrypointEnum entry) {
-  __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64WordSize>(entry).Int32Value()));
+  __ Ldr(lr, MemOperand(tr,
+                        GetThreadOffset<kArm64PointerSize>(entry).Int32Value()));
   __ Blr(lr);
   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -1655,10 +1788,11 @@
 
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   // Check assumption that sizeof(Char) is 2 (used in scaling below).
@@ -1680,29 +1814,57 @@
   Register dstBegin = XRegisterFrom(locations->InAt(4));
 
   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
-  Register src_ptr_end = XRegisterFrom(locations->GetTemp(1));
+  Register num_chr = XRegisterFrom(locations->GetTemp(1));
+  Register tmp1 = XRegisterFrom(locations->GetTemp(2));
 
   UseScratchRegisterScope temps(masm);
   Register dst_ptr = temps.AcquireX();
-  Register tmp = temps.AcquireW();
+  Register tmp2 = temps.AcquireX();
 
-  // src range to copy.
+  // src address to copy from.
   __ Add(src_ptr, srcObj, Operand(value_offset));
-  __ Add(src_ptr_end, src_ptr, Operand(srcEnd, LSL, 1));
   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
 
-  // dst to be copied.
+  // dst address start to copy to.
   __ Add(dst_ptr, dstObj, Operand(data_offset));
   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
 
+  __ Sub(num_chr, srcEnd, srcBegin);
+
   // Do the copy.
-  vixl::Label loop, done;
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label done;
+  vixl::aarch64::Label remainder;
+
+  // Early out for valid zero-length retrievals.
+  __ Cbz(num_chr, &done);
+
+  // Save repairing the value of num_chr on the < 8 character path.
+  __ Subs(tmp1, num_chr, 8);
+  __ B(lt, &remainder);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
+  __ Mov(num_chr, tmp1);
+
+  // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
+  // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
   __ Bind(&loop);
-  __ Cmp(src_ptr, src_ptr_end);
-  __ B(&done, eq);
-  __ Ldrh(tmp, MemOperand(src_ptr, char_size, vixl::PostIndex));
-  __ Strh(tmp, MemOperand(dst_ptr, char_size, vixl::PostIndex));
-  __ B(&loop);
+  __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
+  __ Subs(num_chr, num_chr, 8);
+  __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
+  __ B(ge, &loop);
+
+  __ Adds(num_chr, num_chr, 8);
+  __ B(eq, &done);
+
+  // Main loop for < 8 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
+  __ Subs(num_chr, num_chr, 1);
+  __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
+  __ B(gt, &remainder);
+
   __ Bind(&done);
 }
 
@@ -1714,7 +1876,7 @@
                                                uint32_t at,
                                                HInstruction* input) {
   HIntConstant* const_input = input->AsIntConstant();
-  if (const_input != nullptr && !vixl::Assembler::IsImmAddSub(const_input->GetValue())) {
+  if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
     locations->SetInAt(at, Location::RequiresRegister());
   } else {
     locations->SetInAt(at, Location::RegisterOrConstant(input));
@@ -1761,12 +1923,11 @@
   locations->AddTemp(Location::RequiresRegister());
 }
 
-static void CheckSystemArrayCopyPosition(vixl::MacroAssembler* masm,
+static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
                                          const Location& pos,
                                          const Register& input,
                                          const Location& length,
                                          SlowPathCodeARM64* slow_path,
-                                         const Register& input_len,
                                          const Register& temp,
                                          bool length_is_input_length = false) {
   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
@@ -1781,8 +1942,8 @@
       }
     } else {
       // Check that length(input) >= pos.
-      __ Ldr(input_len, MemOperand(input, length_offset));
-      __ Subs(temp, input_len, pos_const);
+      __ Ldr(temp, MemOperand(input, length_offset));
+      __ Subs(temp, temp, pos_const);
       __ B(slow_path->GetEntryLabel(), lt);
 
       // Check that (length(input) - pos) >= length.
@@ -1795,7 +1956,7 @@
   } else {
     // Check that pos >= 0.
     Register pos_reg = WRegisterFrom(pos);
-    __ Tbnz(pos_reg, pos_reg.size() - 1, slow_path->GetEntryLabel());
+    __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
 
     // Check that pos <= length(input) && (length(input) - pos) >= length.
     __ Ldr(temp, MemOperand(input, length_offset));
@@ -1808,7 +1969,7 @@
 
 // Compute base source address, base destination address, and end source address
 // for System.arraycopy* intrinsics.
-static void GenSystemArrayCopyAddresses(vixl::MacroAssembler* masm,
+static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
                                         Primitive::Type type,
                                         const Register& src,
                                         const Location& src_pos,
@@ -1849,7 +2010,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
   Register src = XRegisterFrom(locations->InAt(0));
   Location src_pos = locations->InAt(1);
@@ -1893,7 +2054,6 @@
                                length,
                                slow_path,
                                src_curr_addr,
-                               dst_curr_addr,
                                false);
 
   CheckSystemArrayCopyPosition(masm,
@@ -1902,7 +2062,6 @@
                                length,
                                slow_path,
                                src_curr_addr,
-                               dst_curr_addr,
                                false);
 
   src_curr_addr = src_curr_addr.X();
@@ -1924,12 +2083,12 @@
   const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   UseScratchRegisterScope temps(masm);
   Register tmp = temps.AcquireW();
-  vixl::Label loop, done;
+  vixl::aarch64::Label loop, done;
   __ Bind(&loop);
   __ Cmp(src_curr_addr, src_stop_addr);
   __ B(&done, eq);
-  __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, vixl::PostIndex));
-  __ Strh(tmp, MemOperand(dst_curr_addr, char_size, vixl::PostIndex));
+  __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
+  __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
   __ B(&loop);
   __ Bind(&done);
 
@@ -1943,6 +2102,12 @@
 // We want to use two temporary registers in order to reduce the register pressure in arm64.
 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
@@ -1992,16 +2157,29 @@
 
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Temporary register IP0, obtained from the VIXL scratch register
+    // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
+    // (because that register is clobbered by ReadBarrierMarkRegX
+    // entry points). Get an extra temporary register from the
+    // register allocator.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   Register src = XRegisterFrom(locations->InAt(0));
   Location src_pos = locations->InAt(1);
@@ -2009,12 +2187,14 @@
   Location dest_pos = locations->InAt(3);
   Location length = locations->InAt(4);
   Register temp1 = WRegisterFrom(locations->GetTemp(0));
+  Location temp1_loc = LocationFrom(temp1);
   Register temp2 = WRegisterFrom(locations->GetTemp(1));
+  Location temp2_loc = LocationFrom(temp2);
 
-  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
 
-  vixl::Label conditions_on_positions_validated;
+  vixl::aarch64::Label conditions_on_positions_validated;
   SystemArrayCopyOptimizations optimizations(invoke);
 
   // If source and destination are the same, we go to slow path if we need to do
@@ -2028,7 +2208,7 @@
         DCHECK_GE(src_pos_constant, dest_pos_constant);
       } else if (src_pos_constant < dest_pos_constant) {
         __ Cmp(src, dest);
-        __ B(slow_path->GetEntryLabel(), eq);
+        __ B(intrinsic_slow_path->GetEntryLabel(), eq);
       }
       // Checked when building locations.
       DCHECK(!optimizations.GetDestinationIsSource()
@@ -2039,7 +2219,7 @@
         __ B(&conditions_on_positions_validated, ne);
       }
       __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
-      __ B(slow_path->GetEntryLabel(), gt);
+      __ B(intrinsic_slow_path->GetEntryLabel(), gt);
     }
   } else {
     if (!optimizations.GetDestinationIsSource()) {
@@ -2048,19 +2228,19 @@
     }
     __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
            OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
-    __ B(slow_path->GetEntryLabel(), lt);
+    __ B(intrinsic_slow_path->GetEntryLabel(), lt);
   }
 
   __ Bind(&conditions_on_positions_validated);
 
   if (!optimizations.GetSourceIsNotNull()) {
     // Bail out if the source is null.
-    __ Cbz(src, slow_path->GetEntryLabel());
+    __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
   }
 
   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
     // Bail out if the destination is null.
-    __ Cbz(dest, slow_path->GetEntryLabel());
+    __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
   }
 
   // We have already checked in the LocationsBuilder for the constant case.
@@ -2068,19 +2248,18 @@
       !optimizations.GetCountIsSourceLength() &&
       !optimizations.GetCountIsDestinationLength()) {
     // If the length is negative, bail out.
-    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel());
     // If the length >= 128 then (currently) prefer native implementation.
     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
-    __ B(slow_path->GetEntryLabel(), ge);
+    __ B(intrinsic_slow_path->GetEntryLabel(), ge);
   }
   // Validity checks: source.
   CheckSystemArrayCopyPosition(masm,
                                src_pos,
                                src,
                                length,
-                               slow_path,
+                               intrinsic_slow_path,
                                temp1,
-                               temp2,
                                optimizations.GetCountIsSourceLength());
 
   // Validity checks: dest.
@@ -2088,91 +2267,236 @@
                                dest_pos,
                                dest,
                                length,
-                               slow_path,
+                               intrinsic_slow_path,
                                temp1,
-                               temp2,
                                optimizations.GetCountIsDestinationLength());
   {
     // We use a block to end the scratch scope before the write barrier, thus
     // freeing the temporary registers so they can be used in `MarkGCCard`.
     UseScratchRegisterScope temps(masm);
+    // Note: Because it is acquired from VIXL's scratch register pool,
+    // `temp3` might be IP0, and thus cannot be used as `ref` argument
+    // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+    // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
     Register temp3 = temps.AcquireW();
+
     if (!optimizations.GetDoesNotNeedTypeCheck()) {
       // Check whether all elements of the source array are assignable to the component
       // type of the destination array. We do two checks: the classes are the same,
       // or the destination is Object[]. If none of these checks succeed, we go to the
       // slow path.
-      __ Ldr(temp1, MemOperand(dest, class_offset));
-      __ Ldr(temp2, MemOperand(src, class_offset));
-      bool did_unpoison = false;
-      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
-          !optimizations.GetSourceIsNonPrimitiveArray()) {
-        // One or two of the references need to be unpoisoned. Unpoison them
-        // both to make the identity check valid.
-        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
-        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
-        did_unpoison = true;
-      }
 
-      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
-        // Bail out if the destination is not a non primitive array.
-        // /* HeapReference<Class> */ temp3 = temp1->component_type_
-        __ Ldr(temp3, HeapOperand(temp1, component_offset));
-        __ Cbz(temp3, slow_path->GetEntryLabel());
-        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
-        __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
-        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-        __ Cbnz(temp3, slow_path->GetEntryLabel());
-      }
-
-      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
-        // Bail out if the source is not a non primitive array.
-        // /* HeapReference<Class> */ temp3 = temp2->component_type_
-        __ Ldr(temp3, HeapOperand(temp2, component_offset));
-        __ Cbz(temp3, slow_path->GetEntryLabel());
-        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
-        __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
-        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-        __ Cbnz(temp3, slow_path->GetEntryLabel());
-      }
-
-      __ Cmp(temp1, temp2);
-
-      if (optimizations.GetDestinationIsTypedObjectArray()) {
-        vixl::Label do_copy;
-        __ B(&do_copy, eq);
-        if (!did_unpoison) {
-          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+          // /* HeapReference<Class> */ temp1 = src->klass_
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                          temp1_loc,
+                                                          src.W(),
+                                                          class_offset,
+                                                          temp2,
+                                                          /* needs_null_check */ false,
+                                                          /* use_load_acquire */ false);
+          // Bail out if the source is not a non primitive array.
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                          temp1_loc,
+                                                          temp1,
+                                                          component_offset,
+                                                          temp2,
+                                                          /* needs_null_check */ false,
+                                                          /* use_load_acquire */ false);
+          __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+          // If heap poisoning is enabled, `temp1` has been unpoisoned
+          // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+          // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+          __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
+          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
         }
-        // /* HeapReference<Class> */ temp1 = temp1->component_type_
-        __ Ldr(temp1, HeapOperand(temp1, component_offset));
-        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
-        // /* HeapReference<Class> */ temp1 = temp1->super_class_
-        __ Ldr(temp1, HeapOperand(temp1, super_offset));
-        // No need to unpoison the result, we're comparing against null.
-        __ Cbnz(temp1, slow_path->GetEntryLabel());
-        __ Bind(&do_copy);
+
+        // /* HeapReference<Class> */ temp1 = dest->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                        temp1_loc,
+                                                        dest.W(),
+                                                        class_offset,
+                                                        temp2,
+                                                        /* needs_null_check */ false,
+                                                        /* use_load_acquire */ false);
+
+        if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+          // Bail out if the destination is not a non primitive array.
+          //
+          // Register `temp1` is not trashed by the read barrier emitted
+          // by GenerateFieldLoadWithBakerReadBarrier below, as that
+          // method produces a call to a ReadBarrierMarkRegX entry point,
+          // which saves all potentially live registers, including
+          // temporaries such a `temp1`.
+          // /* HeapReference<Class> */ temp2 = temp1->component_type_
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                          temp2_loc,
+                                                          temp1,
+                                                          component_offset,
+                                                          temp3,
+                                                          /* needs_null_check */ false,
+                                                          /* use_load_acquire */ false);
+          __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+          // If heap poisoning is enabled, `temp2` has been unpoisoned
+          // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+          // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+          __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
+          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+          __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+        }
+
+        // For the same reason given earlier, `temp1` is not trashed by the
+        // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+        // /* HeapReference<Class> */ temp2 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                        temp2_loc,
+                                                        src.W(),
+                                                        class_offset,
+                                                        temp3,
+                                                        /* needs_null_check */ false,
+                                                        /* use_load_acquire */ false);
+        // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+        __ Cmp(temp1, temp2);
+
+        if (optimizations.GetDestinationIsTypedObjectArray()) {
+          vixl::aarch64::Label do_copy;
+          __ B(&do_copy, eq);
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                          temp1_loc,
+                                                          temp1,
+                                                          component_offset,
+                                                          temp2,
+                                                          /* needs_null_check */ false,
+                                                          /* use_load_acquire */ false);
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          // We do not need to emit a read barrier for the following
+          // heap reference load, as `temp1` is only used in a
+          // comparison with null below, and this reference is not
+          // kept afterwards.
+          __ Ldr(temp1, HeapOperand(temp1, super_offset));
+          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+          __ Bind(&do_copy);
+        } else {
+          __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+        }
       } else {
-        __ B(slow_path->GetEntryLabel(), ne);
+        // Non read barrier code.
+
+        // /* HeapReference<Class> */ temp1 = dest->klass_
+        __ Ldr(temp1, MemOperand(dest, class_offset));
+        // /* HeapReference<Class> */ temp2 = src->klass_
+        __ Ldr(temp2, MemOperand(src, class_offset));
+        bool did_unpoison = false;
+        if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+            !optimizations.GetSourceIsNonPrimitiveArray()) {
+          // One or two of the references need to be unpoisoned. Unpoison them
+          // both to make the identity check valid.
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+          did_unpoison = true;
+        }
+
+        if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+          // Bail out if the destination is not a non primitive array.
+          // /* HeapReference<Class> */ temp3 = temp1->component_type_
+          __ Ldr(temp3, HeapOperand(temp1, component_offset));
+          __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+          // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+          __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+          __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+        }
+
+        if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+          // Bail out if the source is not a non primitive array.
+          // /* HeapReference<Class> */ temp3 = temp2->component_type_
+          __ Ldr(temp3, HeapOperand(temp2, component_offset));
+          __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+          // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+          __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+          __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+        }
+
+        __ Cmp(temp1, temp2);
+
+        if (optimizations.GetDestinationIsTypedObjectArray()) {
+          vixl::aarch64::Label do_copy;
+          __ B(&do_copy, eq);
+          if (!did_unpoison) {
+            codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+          }
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          __ Ldr(temp1, HeapOperand(temp1, component_offset));
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          __ Ldr(temp1, HeapOperand(temp1, super_offset));
+          // No need to unpoison the result, we're comparing against null.
+          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+          __ Bind(&do_copy);
+        } else {
+          __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+        }
       }
     } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
       // Bail out if the source is not a non primitive array.
-      // /* HeapReference<Class> */ temp1 = src->klass_
-      __ Ldr(temp1, HeapOperand(src.W(), class_offset));
-      codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
-      // /* HeapReference<Class> */ temp3 = temp1->component_type_
-      __ Ldr(temp3, HeapOperand(temp1, component_offset));
-      __ Cbz(temp3, slow_path->GetEntryLabel());
-      codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
-      __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                        temp1_loc,
+                                                        src.W(),
+                                                        class_offset,
+                                                        temp2,
+                                                        /* needs_null_check */ false,
+                                                        /* use_load_acquire */ false);
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+                                                        temp2_loc,
+                                                        temp1,
+                                                        component_offset,
+                                                        temp3,
+                                                        /* needs_null_check */ false,
+                                                        /* use_load_acquire */ false);
+        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+      } else {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        __ Ldr(temp2, HeapOperand(temp1, component_offset));
+        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+      }
+      // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+      __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbnz(temp3, slow_path->GetEntryLabel());
+      __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
     }
 
     Register src_curr_addr = temp1.X();
     Register dst_curr_addr = temp2.X();
-    Register src_stop_addr = temp3.X();
+    Register src_stop_addr;
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // Temporary register IP0, obtained from the VIXL scratch
+      // register pool as `temp3`, cannot be used in
+      // ReadBarrierSystemArrayCopySlowPathARM64 (because that
+      // register is clobbered by ReadBarrierMarkRegX entry points).
+      // So another temporary register allocated by the register
+      // allocator instead.
+      DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
+      src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+    } else {
+      src_stop_addr = temp3.X();
+    }
 
     GenSystemArrayCopyAddresses(masm,
                                 Primitive::kPrimNot,
@@ -2185,31 +2509,140 @@
                                 dst_curr_addr,
                                 src_stop_addr);
 
-    // Iterate over the arrays and do a raw copy of the objects. We don't need to
-    // poison/unpoison, nor do any read barrier as the next uses of the destination
-    // array will do it.
-    vixl::Label loop, done;
     const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
-    __ Bind(&loop);
-    __ Cmp(src_curr_addr, src_stop_addr);
-    __ B(&done, eq);
-    {
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // SystemArrayCopy implementation for Baker read barriers (see
+      // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+      //
+      //   if (src_ptr != end_ptr) {
+      //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+      //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+      //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+      //     if (is_gray) {
+      //       // Slow-path copy.
+      //       do {
+      //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+      //       } while (src_ptr != end_ptr)
+      //     } else {
+      //       // Fast-path copy.
+      //       do {
+      //         *dest_ptr++ = *src_ptr++;
+      //       } while (src_ptr != end_ptr)
+      //     }
+      //   }
+
+      vixl::aarch64::Label loop, done;
+
+      // Don't enter copy loop if `length == 0`.
+      __ Cmp(src_curr_addr, src_stop_addr);
+      __ B(&done, eq);
+
       Register tmp = temps.AcquireW();
-      __ Ldr(tmp, MemOperand(src_curr_addr, element_size, vixl::PostIndex));
-      __ Str(tmp, MemOperand(dst_curr_addr, element_size, vixl::PostIndex));
+      // Make sure `tmp` is not IP0, as it is clobbered by
+      // ReadBarrierMarkRegX entry points in
+      // ReadBarrierSystemArrayCopySlowPathARM64.
+      DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+      // /* int32_t */ monitor = src->monitor_
+      __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+      // /* LockWord */ lock_word = LockWord(monitor)
+      static_assert(sizeof(LockWord) == sizeof(int32_t),
+                    "art::LockWord and int32_t have different sizes.");
+
+      // Introduce a dependency on the lock_word including rb_state,
+      // to prevent load-load reordering, and without using
+      // a memory barrier (which would be more expensive).
+      // `src` is unchanged by this operation, but its value now depends
+      // on `tmp`.
+      __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+      // Slow path used to copy array when `src` is gray.
+      SlowPathCodeARM64* read_barrier_slow_path =
+          new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+      codegen_->AddSlowPath(read_barrier_slow_path);
+
+      // Given the numeric representation, it's enough to check the low bit of the rb_state.
+      static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+      static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+      static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+      __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+      // Fast-path copy.
+      // Iterate over the arrays and do a raw copy of the objects. We don't need to
+      // poison/unpoison.
+      __ Bind(&loop);
+      __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+      __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+      __ Cmp(src_curr_addr, src_stop_addr);
+      __ B(&loop, ne);
+
+      __ Bind(read_barrier_slow_path->GetExitLabel());
+      __ Bind(&done);
+    } else {
+      // Non read barrier code.
+
+      // Iterate over the arrays and do a raw copy of the objects. We don't need to
+      // poison/unpoison.
+      vixl::aarch64::Label loop, done;
+      __ Bind(&loop);
+      __ Cmp(src_curr_addr, src_stop_addr);
+      __ B(&done, eq);
+      {
+        Register tmp = temps.AcquireW();
+        __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+        __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+      }
+      __ B(&loop);
+      __ Bind(&done);
     }
-    __ B(&loop);
-    __ Bind(&done);
   }
   // We only need one card marking on the destination array.
   codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
 
-  __ Bind(slow_path->GetExitLabel());
+  __ Bind(intrinsic_slow_path->GetExitLabel());
+}
+
+static void GenIsInfinite(LocationSummary* locations,
+                          bool is64bit,
+                          MacroAssembler* masm) {
+  Operand infinity;
+  Register out;
+
+  if (is64bit) {
+    infinity = kPositiveInfinityDouble;
+    out = XRegisterFrom(locations->Out());
+  } else {
+    infinity = kPositiveInfinityFloat;
+    out = WRegisterFrom(locations->Out());
+  }
+
+  const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
+
+  MoveFPToInt(locations, is64bit, masm);
+  __ Eor(out, out, infinity);
+  // We don't care about the sign bit, so shift left.
+  __ Cmp(zero, Operand(out, LSL, 1));
+  __ Cset(out, eq);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM64, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(ARM64, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index d47448a..5251536 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -20,10 +20,11 @@
 #include "intrinsics.h"
 
 namespace vixl {
+namespace aarch64 {
 
 class MacroAssembler;
 
-}  // namespace vixl
+}}  // namespace vixl::aarch64
 
 namespace art {
 
@@ -73,7 +74,7 @@
 #undef OPTIMIZING_INTRINSICS
 
  private:
-  vixl::MacroAssembler* GetVIXLAssembler();
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler();
 
   ArenaAllocator* GetAllocator();
 
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index dd9294d..db60238 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -107,6 +107,8 @@
   V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
   V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
   V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringIsEmpty, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
+  V(StringLength, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
   V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 19c6a22..6e5eb66 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1872,58 +1872,10 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-// char java.lang.String.charAt(int index)
-void IntrinsicLocationsBuilderMIPS::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  // The inputs will be considered live at the last instruction and restored. This would overwrite
-  // the output with kNoOutputOverlap.
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  MipsAssembler* assembler = GetAssembler();
-
-  // Location of reference to data array
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();
-  Register idx = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
-
-  // TODO: Maybe we can support range check elimination. Overall,
-  //       though, I think it's not worth the cost.
-  // TODO: For simplicity, the index parameter is requested in a
-  //       register, so different from Quick we will not optimize the
-  //       code for constants (which would save a register).
-
-  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load the string size
-  __ Lw(TMP, obj, count_offset);
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  // Revert to slow path if idx is too large, or negative
-  __ Bgeu(idx, TMP, slow_path->GetEntryLabel());
-
-  // out = obj[2*idx].
-  __ Sll(TMP, idx, 1);                  // idx * 2
-  __ Addu(TMP, TMP, obj);               // Address of char at location idx
-  __ Lhu(out, TMP, value_offset);       // Load char at location idx
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1947,8 +1899,7 @@
   __ LoadFromOffset(kLoadWord,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize,
-                                            pStringCompareTo).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pStringCompareTo).Int32Value());
   __ Jalr(T9);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
@@ -2067,11 +2018,12 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check for code points > 0xFFFF. Either a slow-path check when we
-  // don't know statically, or directly dispatch if we have a constant.
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCodeMIPS* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (!IsUint<16>(code_point->AsIntConstant()->GetValue())) {
       // Always needs the slow-path. We could directly dispatch to it,
       // but this case should be rare, so for simplicity just put the
       // full slow-path down and branch unconditionally.
@@ -2081,7 +2033,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     Register char_reg = locations->InAt(1).AsRegister<Register>();
     // The "bltu" conditional branch tests to see if the character value
     // fits in a valid 16-bit (MIPS halfword) value. If it doesn't then
@@ -2106,7 +2058,7 @@
   __ LoadFromOffset(kLoadWord,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pIndexOf).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pIndexOf).Int32Value());
   __ Jalr(T9);
   __ Nop();
 
@@ -2118,7 +2070,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -2143,7 +2095,7 @@
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -2169,7 +2121,7 @@
 // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2192,7 +2144,7 @@
   __ LoadFromOffset(kLoadWord,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromBytes).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromBytes).Int32Value());
   __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2202,7 +2154,7 @@
 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2225,7 +2177,7 @@
   __ LoadFromOffset(kLoadWord,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromChars).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromChars).Int32Value());
   __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2234,7 +2186,7 @@
 // java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2254,7 +2206,7 @@
   __ LoadFromOffset(kLoadWord,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromString).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromString).Int32Value());
   __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2283,10 +2235,10 @@
     // If one, or more, of the exponent bits is zero, then the number can't be infinite.
     if (type == Primitive::kPrimDouble) {
       __ MoveFromFpuHigh(TMP, in);
-      __ LoadConst32(AT, 0x7FF00000);
+      __ LoadConst32(AT, High32Bits(kPositiveInfinityDouble));
     } else {
       __ Mfc1(TMP, in);
-      __ LoadConst32(AT, 0x7F800000);
+      __ LoadConst32(AT, kPositiveInfinityFloat);
     }
     __ Xor(TMP, TMP, AT);
 
@@ -2432,13 +2384,128 @@
   GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
 }
 
+// int java.lang.Math.round(float)
+void IntrinsicLocationsBuilderMIPS::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister half = locations->GetTemp(0).AsFpuRegister<FRegister>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  MipsLabel done;
+  MipsLabel finite;
+  MipsLabel add;
+
+  // if (in.isNaN) {
+  //   return 0;
+  // }
+  //
+  // out = floor.w.s(in);
+  //
+  // /*
+  //  * This "if" statement is only needed for the pre-R6 version of floor.w.s
+  //  * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
+  //  * too large to fit in a 32-bit integer.
+  //  *
+  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
+  //  * numbers which are too large to be represented in a 32-bit signed
+  //  * integer will be processed by floor.w.s to output Integer.MIN_VALUE,
+  //  * and will no longer be processed by this "if" statement.
+  //  */
+  // if (out == Integer.MAX_VALUE) {
+  //   TMP = (in < 0.0f) ? 1 : 0;
+  //   /*
+  //    * If TMP is 1, then adding it to out will wrap its value from
+  //    * Integer.MAX_VALUE to Integer.MIN_VALUE.
+  //    */
+  //   return out += TMP;
+  // }
+  //
+  // /*
+  //  * For negative values not handled by the previous "if" statement the
+  //  * test here will correctly set the value of TMP.
+  //  */
+  // TMP = ((in - out) >= 0.5f) ? 1 : 0;
+  // return out += TMP;
+
+  // Test for NaN.
+  if (IsR6()) {
+    __ CmpUnS(FTMP, in, in);
+  } else {
+    __ CunS(in, in);
+  }
+
+  // Return zero for NaN.
+  __ Move(out, ZERO);
+  if (IsR6()) {
+    __ Bc1nez(FTMP, &done);
+  } else {
+    __ Bc1t(&done);
+  }
+
+  // out = floor(in);
+  __ FloorWS(FTMP, in);
+  __ Mfc1(out, FTMP);
+
+  if (!IsR6()) {
+    __ LoadConst32(TMP, -1);
+  }
+
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
+  __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+  __ Bne(AT, out, &finite);
+
+  __ Mtc1(ZERO, FTMP);
+  if (IsR6()) {
+    __ CmpLtS(FTMP, in, FTMP);
+    __ Mfc1(TMP, FTMP);
+  } else {
+    __ ColtS(in, FTMP);
+  }
+
+  __ B(&add);
+
+  __ Bind(&finite);
+
+  // TMP = (0.5f <= (in - out)) ? -1 : 0;
+  __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+  __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+  __ SubS(FTMP, in, FTMP);
+  __ Mtc1(AT, half);
+  if (IsR6()) {
+    __ CmpLeS(FTMP, half, FTMP);
+    __ Mfc1(TMP, FTMP);
+  } else {
+    __ ColeS(half, FTMP);
+  }
+
+  __ Bind(&add);
+
+  if (!IsR6()) {
+    __ Movf(TMP, ZERO);
+  }
+
+  // Return out -= TMP.
+  __ Subu(out, out, TMP);
+
+  __ Bind(&done);
+}
+
 // Unimplemented intrinsics.
 
 UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundFloat)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cf973aa..1e18540 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -385,6 +385,92 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
+static void GenBitCount(LocationSummary* locations,
+                        const Primitive::Type type,
+                        Mips64Assembler* assembler) {
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  //
+  // A generalization of the best bit counting method to integers of
+  // bit-widths up to 128 (parameterized by type T) is this:
+  //
+  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
+  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
+  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
+  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count
+  //
+  // For comparison, for 32-bit quantities, this algorithm can be executed
+  // using 20 MIPS instructions (the calls to LoadConst32() generate two
+  // machine instructions each for the values being used in this algorithm).
+  // A(n unrolled) loop-based algorithm requires 25 instructions.
+  //
+  // For a 64-bit operand this can be performed in 24 instructions compared
+  // to a(n unrolled) loop based algorithm which requires 38 instructions.
+  //
+  // There are algorithms which are faster in the cases where very few
+  // bits are set but the algorithm here attempts to minimize the total
+  // number of instructions executed even when a large number of bits
+  // are set.
+
+  if (type == Primitive::kPrimInt) {
+    __ Srl(TMP, in, 1);
+    __ LoadConst32(AT, 0x55555555);
+    __ And(TMP, TMP, AT);
+    __ Subu(TMP, in, TMP);
+    __ LoadConst32(AT, 0x33333333);
+    __ And(out, TMP, AT);
+    __ Srl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Addu(TMP, out, TMP);
+    __ Srl(out, TMP, 4);
+    __ Addu(out, out, TMP);
+    __ LoadConst32(AT, 0x0F0F0F0F);
+    __ And(out, out, AT);
+    __ LoadConst32(TMP, 0x01010101);
+    __ MulR6(out, out, TMP);
+    __ Srl(out, out, 24);
+  } else if (type == Primitive::kPrimLong) {
+    __ Dsrl(TMP, in, 1);
+    __ LoadConst64(AT, 0x5555555555555555L);
+    __ And(TMP, TMP, AT);
+    __ Dsubu(TMP, in, TMP);
+    __ LoadConst64(AT, 0x3333333333333333L);
+    __ And(out, TMP, AT);
+    __ Dsrl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Daddu(TMP, out, TMP);
+    __ Dsrl(out, TMP, 4);
+    __ Daddu(out, out, TMP);
+    __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+    __ And(out, out, AT);
+    __ LoadConst64(TMP, 0x0101010101010101L);
+    __ Dmul(out, out, TMP);
+    __ Dsrl32(out, out, 24);
+  }
+}
+
+// int java.lang.Integer.bitCount(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// int java.lang.Long.bitCount(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
 static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
   FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
   FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
@@ -790,6 +876,151 @@
   GenRoundingMode(invoke->GetLocations(), kCeil, GetAssembler());
 }
 
+static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Primitive::Type type) {
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister half = locations->GetTemp(0).AsFpuRegister<FpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
+
+  Mips64Label done;
+  Mips64Label finite;
+  Mips64Label add;
+
+  // if (in.isNaN) {
+  //   return 0;
+  // }
+  //
+  // out = floor(in);
+  //
+  // /*
+  //  * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed.
+  //  *
+  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
+  //  * numbers which are too large to be represented in a 32-/64-bit
+  //  * signed integer will be processed by floor.X.Y to output
+  //  * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be
+  //  * processed by this "if" statement.
+  //  *
+  //  * However, this bug in the 64-bit MIPS emulator causes the
+  //  * behavior of floor.X.Y to be the same as pre-R6 implementations
+  //  * of MIPS64.  When that bug is fixed this logic should be amended.
+  //  */
+  // if (out == MAX_VALUE) {
+  //   TMP = (in < 0.0) ? 1 : 0;
+  //   /*
+  //    * If TMP is 1, then adding it to out will wrap its value from
+  //    * MAX_VALUE to MIN_VALUE.
+  //    */
+  //   return out += TMP;
+  // }
+  //
+  // /*
+  //  * For negative values not handled by the previous "if" statement the
+  //  * test here will correctly set the value of TMP.
+  //  */
+  // TMP = ((in - out) >= 0.5) ? 1 : 0;
+  // return out += TMP;
+
+  // Test for NaN.
+  if (type == Primitive::kPrimDouble) {
+    __ CmpUnD(FTMP, in, in);
+  } else {
+    __ CmpUnS(FTMP, in, in);
+  }
+
+  // Return zero for NaN.
+  __ Move(out, ZERO);
+  __ Bc1nez(FTMP, &done);
+
+  // out = floor(in);
+  if (type == Primitive::kPrimDouble) {
+    __ FloorLD(FTMP, in);
+    __ Dmfc1(out, FTMP);
+  } else {
+    __ FloorWS(FTMP, in);
+    __ Mfc1(out, FTMP);
+  }
+
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+  if (type == Primitive::kPrimDouble) {
+    __ LoadConst64(AT, std::numeric_limits<int64_t>::max());
+  } else {
+    __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+  }
+  __ Bnec(AT, out, &finite);
+
+  if (type == Primitive::kPrimDouble) {
+    __ Dmtc1(ZERO, FTMP);
+    __ CmpLtD(FTMP, in, FTMP);
+    __ Dmfc1(AT, FTMP);
+  } else {
+    __ Mtc1(ZERO, FTMP);
+    __ CmpLtS(FTMP, in, FTMP);
+    __ Mfc1(AT, FTMP);
+  }
+
+  __ Bc(&add);
+
+  __ Bind(&finite);
+
+  // TMP = (0.5 <= (in - out)) ? -1 : 0;
+  if (type == Primitive::kPrimDouble) {
+    __ Cvtdl(FTMP, FTMP);  // Convert output of floor.l.d back to "double".
+    __ LoadConst64(AT, bit_cast<int64_t, double>(0.5));
+    __ SubD(FTMP, in, FTMP);
+    __ Dmtc1(AT, half);
+    __ CmpLeD(FTMP, half, FTMP);
+    __ Dmfc1(AT, FTMP);
+  } else {
+    __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+    __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+    __ SubS(FTMP, in, FTMP);
+    __ Mtc1(AT, half);
+    __ CmpLeS(FTMP, half, FTMP);
+    __ Mfc1(AT, FTMP);
+  }
+
+  __ Bind(&add);
+
+  // Return out -= TMP.
+  if (type == Primitive::kPrimDouble) {
+    __ Dsubu(out, out, AT);
+  } else {
+    __ Subu(out, out, AT);
+  }
+
+  __ Bind(&done);
+}
+
+// int java.lang.Math.round(float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRoundFloat(HInvoke* invoke) {
+  GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimFloat);
+}
+
+// long java.lang.Math.round(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRoundDouble(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRoundDouble(HInvoke* invoke) {
+  GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimDouble);
+}
+
 // byte libcore.io.Memory.peekByte(long address)
 void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
@@ -1285,56 +1516,10 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-// char java.lang.String.charAt(int index)
-void IntrinsicLocationsBuilderMIPS64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  Mips64Assembler* assembler = GetAssembler();
-
-  // Location of reference to data array
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
-  GpuRegister idx = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
-  // TODO: Maybe we can support range check elimination. Overall,
-  //       though, I think it's not worth the cost.
-  // TODO: For simplicity, the index parameter is requested in a
-  //       register, so different from Quick we will not optimize the
-  //       code for constants (which would save a register).
-
-  SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load the string size
-  __ Lw(TMP, obj, count_offset);
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  // Revert to slow path if idx is too large, or negative
-  __ Bgeuc(idx, TMP, slow_path->GetEntryLabel());
-
-  // out = obj[2*idx].
-  __ Sll(TMP, idx, 1);                  // idx * 2
-  __ Daddu(TMP, TMP, obj);              // Address of char at location idx
-  __ Lhu(out, TMP, value_offset);       // Load char at location idx
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1358,7 +1543,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pStringCompareTo).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pStringCompareTo).Int32Value());
   __ Jalr(T9);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
@@ -1477,11 +1662,12 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check for code points > 0xFFFF. Either a slow-path check when we
-  // don't know statically, or directly dispatch if we have a constant.
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCodeMIPS64* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (!IsUint<16>(code_point->AsIntConstant()->GetValue())) {
       // Always needs the slow-path. We could directly dispatch to it,
       // but this case should be rare, so for simplicity just put the
       // full slow-path down and branch unconditionally.
@@ -1491,7 +1677,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     GpuRegister char_reg = locations->InAt(1).AsRegister<GpuRegister>();
     __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max());
     slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
@@ -1508,7 +1694,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pIndexOf).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pIndexOf).Int32Value());
   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
   __ Jalr(T9);
   __ Nop();
@@ -1521,7 +1707,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1542,7 +1728,7 @@
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1562,7 +1748,7 @@
 // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1585,7 +1771,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
                                             pAllocStringFromBytes).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   __ Jalr(T9);
@@ -1597,7 +1783,7 @@
 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1619,7 +1805,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
                                             pAllocStringFromChars).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   __ Jalr(T9);
@@ -1630,7 +1816,7 @@
 // java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1650,7 +1836,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     T9,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                    QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
                                             pAllocStringFromString).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   __ Jalr(T9);
@@ -1693,11 +1879,83 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongBitCount)
+static void GenHighestOneBit(LocationSummary* locations,
+                             Primitive::Type type,
+                             Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type);
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundFloat)
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ Dclz(TMP, in);
+    __ LoadConst64(AT, INT64_C(0x8000000000000000));
+    __ Dsrlv(out, AT, TMP);
+  } else {
+    __ Clz(TMP, in);
+    __ LoadConst32(AT, 0x80000000);
+    __ Srlv(out, AT, TMP);
+  }
+  // For either value of "type", when "in" is zero, "out" should also
+  // be zero. Without this extra "and" operation, when "in" is zero,
+  // "out" would be either Integer.MIN_VALUE, or Long.MIN_VALUE because
+  // the MIPS logical shift operations "dsrlv", and "srlv" don't use
+  // the shift amount (TMP) directly; they use either (TMP % 64) or
+  // (TMP % 32), respectively.
+  __ And(out, out, in);
+}
+
+// int java.lang.Integer.highestOneBit(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.highestOneBit(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenLowestOneBit(LocationSummary* locations,
+                            Primitive::Type type,
+                            Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type);
+
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ Dsubu(TMP, ZERO, in);
+  } else {
+    __ Subu(TMP, ZERO, in);
+  }
+  __ And(out, TMP, in);
+}
+
+// int java.lang.Integer.lowestOneBit(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.lowestOneBit(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
 
 UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringGetCharsNoCheck)
@@ -1722,11 +1980,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathTan)
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathTanh)
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongLowestOneBit)
-
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 4aab3e2..cf4a040 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -60,19 +60,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathX86 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathX86 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -83,6 +70,105 @@
 
 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
 
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
+      : SlowPathCode(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+    Register src = locations->InAt(0).AsRegister<Register>();
+    Location src_pos = locations->InAt(1);
+    Register dest = locations->InAt(2).AsRegister<Register>();
+    Location dest_pos = locations->InAt(3);
+    Location length = locations->InAt(4);
+    Location temp1_loc = locations->GetTemp(0);
+    Register temp1 = temp1_loc.AsRegister<Register>();
+    Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+    Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+    __ Bind(GetEntryLabel());
+    // In this code path, registers `temp1`, `temp2`, and `temp3`
+    // (resp.) are not used for the base source address, the base
+    // destination address, and the end source address (resp.), as in
+    // other SystemArrayCopy intrinsic code paths.  Instead they are
+    // (resp.) used for:
+    // - the loop index (`i`);
+    // - the source index (`src_index`) and the loaded (source)
+    //   reference (`value`); and
+    // - the destination index (`dest_index`).
+
+    // i = 0
+    __ xorl(temp1, temp1);
+    NearLabel loop;
+    __ Bind(&loop);
+    // value = src_array[i + src_pos]
+    if (src_pos.IsConstant()) {
+      int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+      int32_t adjusted_offset = offset + constant * element_size;
+      __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
+    } else {
+      __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+      __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
+    }
+    __ MaybeUnpoisonHeapReference(temp2);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // value = ReadBarrier::Mark(value)
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
+    // explanations.)
+    DCHECK_NE(temp2, ESP);
+    DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+    // This runtime call does not require a stack map.
+    x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ MaybePoisonHeapReference(temp2);
+    // dest_array[i + dest_pos] = value
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      int32_t adjusted_offset = offset + constant * element_size;
+      __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
+    } else {
+      __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+      __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
+    }
+    // ++i
+    __ addl(temp1, Immediate(1));
+    // if (i != length) goto loop
+    x86_codegen->GenerateIntCompare(temp1_loc, length);
+    __ j(kNotEqual, &loop);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
+};
+
+#undef __
+
 #define __ assembler->
 
 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
@@ -719,7 +805,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -765,20 +851,20 @@
   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
 }
 
-// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
-// as it needs 64 bit instructions.
 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (!kRoundIsPlusPointFive) {
-    return;
-  }
-
   // Do we have instruction support?
   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+    HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+    DCHECK(static_or_direct != nullptr);
     LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                               LocationSummary::kNoCall,
                                                               kIntrinsified);
     locations->SetInAt(0, Location::RequiresFpuRegister());
+    if (static_or_direct->HasSpecialInput() &&
+        invoke->InputAt(
+            static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+      locations->SetInAt(1, Location::RequiresRegister());
+    }
     locations->SetOut(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
@@ -787,7 +873,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                            LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(EAX));
@@ -797,54 +883,61 @@
 
 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  if (locations->WillCall()) {
+  if (locations->WillCall()) {  // TODO: can we reach this?
     InvokeOutOfLineIntrinsic(codegen_, invoke);
     return;
   }
 
-  // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
   Register out = locations->Out().AsRegister<Register>();
-  XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  NearLabel skip_incr, done;
   X86Assembler* assembler = GetAssembler();
 
-  // Generate 0.5 into inPlusPointFive.
-  __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
-  __ movd(inPlusPointFive, out);
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5f)
+  //    result = result + 1.0f;
+  __ movss(t2, in);
+  __ roundss(t1, in, Immediate(1));
+  __ subss(t2, t1);
+  if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
+    // Direct constant area available.
+    Register constant_area = locations->InAt(1).AsRegister<Register>();
+    __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
+    __ j(kBelow, &skip_incr);
+    __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
+    __ Bind(&skip_incr);
+  } else {
+    // No constant area: go through stack.
+    __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
+    __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
+    __ comiss(t2, Address(ESP, 4));
+    __ j(kBelow, &skip_incr);
+    __ addss(t1, Address(ESP, 0));
+    __ Bind(&skip_incr);
+    __ addl(ESP, Immediate(8));
+  }
 
-  // Add in the input.
-  __ addss(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
   __ movl(out, Immediate(kPrimIntMax));
-  // maxInt = int-to-float(out)
-  __ cvtsi2ss(maxInt, out);
-
-  // if inPlusPointFive >= maxInt goto done
-  __ comiss(inPlusPointFive, maxInt);
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = float-to-int-truncate(input)
-  __ cvttss2si(out, inPlusPointFive);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  __ cvtsi2ss(t2, out);
+  __ comiss(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttss2si(out, t1);
   __ Bind(&done);
 }
 
 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
                                       HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -870,7 +963,7 @@
   }
 
   // Now do the actual call.
-  __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry)));
+  __ fs()->call(Address::Absolute(GetThreadOffset<kX86PointerSize>(entry)));
 
   // Extract the return value from the FP stack.
   __ fstpl(Address(ESP, 0));
@@ -998,7 +1091,7 @@
 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
                                         HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -1030,48 +1123,6 @@
   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
 }
 
-void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
-  // The inputs plus one temp.
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array.
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count.
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();
-  Register idx = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  X86Assembler* assembler = GetAssembler();
-
-  __ cmpl(idx, Address(obj, count_offset));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ j(kAboveEqual, slow_path->GetEntryLabel());
-
-  // out = out[2*idx].
-  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
   // We need at least two of the positions or length to be an integer constant,
   // or else we won't have enough free registers.
@@ -1125,30 +1176,45 @@
 static void CheckPosition(X86Assembler* assembler,
                           Location pos,
                           Register input,
-                          Register length,
+                          Location length,
                           SlowPathCode* slow_path,
-                          Register input_len,
-                          Register temp) {
-  // Where is the length in the String?
+                          Register temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
 
   if (pos.IsConstant()) {
     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
     if (pos_const == 0) {
-      // Check that length(input) >= length.
-      __ cmpl(Address(input, length_offset), length);
-      __ j(kLess, slow_path->GetEntryLabel());
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        if (length.IsConstant()) {
+          __ cmpl(Address(input, length_offset),
+                  Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
+        }
+        __ j(kLess, slow_path->GetEntryLabel());
+      }
     } else {
       // Check that length(input) >= pos.
-      __ movl(input_len, Address(input, length_offset));
-      __ cmpl(input_len, Immediate(pos_const));
+      __ movl(temp, Address(input, length_offset));
+      __ subl(temp, Immediate(pos_const));
       __ j(kLess, slow_path->GetEntryLabel());
 
       // Check that (length(input) - pos) >= length.
-      __ leal(temp, Address(input_len, -pos_const));
-      __ cmpl(temp, length);
+      if (length.IsConstant()) {
+        __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+      } else {
+        __ cmpl(temp, length.AsRegister<Register>());
+      }
       __ j(kLess, slow_path->GetEntryLabel());
     }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    Register pos_reg = pos.AsRegister<Register>();
+    __ testl(pos_reg, pos_reg);
+    __ j(kNotEqual, slow_path->GetEntryLabel());
   } else {
     // Check that pos >= 0.
     Register pos_reg = pos.AsRegister<Register>();
@@ -1162,7 +1228,11 @@
     // Check that (length(input) - pos) >= length.
     __ movl(temp, Address(input, length_offset));
     __ subl(temp, pos_reg);
-    __ cmpl(temp, length);
+    if (length.IsConstant()) {
+      __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmpl(temp, length.AsRegister<Register>());
+    }
     __ j(kLess, slow_path->GetEntryLabel());
   }
 }
@@ -1214,11 +1284,11 @@
     __ movl(count, length.AsRegister<Register>());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
+  // Validity checks: source. Use src_base as a temporary register.
+  CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
 
-  // Validity checks: dest.
-  CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
+  // Validity checks: dest. Use src_base as a temporary register.
+  CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
 
   // Okay, everything checks out.  Finally time to do the copy.
   // Check assumption that sizeof(Char) is 2 (used in scaling below).
@@ -1252,7 +1322,7 @@
 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1273,7 +1343,7 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pStringCompareTo)));
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1319,11 +1389,11 @@
     __ j(kEqual, &return_false);
   }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
   if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
     __ movl(ecx, Address(str, class_offset));
     __ cmpl(ecx, Address(arg, class_offset));
     __ j(kNotEqual, &return_false);
@@ -1418,10 +1488,11 @@
   DCHECK_EQ(out, EDI);
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -1431,7 +1502,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
     slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
     codegen->AddSlowPath(slow_path);
@@ -1525,7 +1596,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1545,7 +1616,7 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromBytes)));
   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
@@ -1553,7 +1624,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1571,14 +1642,14 @@
   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
   //
   // all include a null check on `data` before calling that method.
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromChars)));
   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1595,7 +1666,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
+  __ fs()->call(
+      Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromString)));
   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
@@ -1836,7 +1908,7 @@
 
 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
-  GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
+  GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
 }
 
 static void GenUnsafeGet(HInvoke* invoke,
@@ -1862,9 +1934,9 @@
       Register output = output_loc.AsRegister<Register>();
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
-          Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          Address src(base, offset, ScaleFactor::TIMES_1, 0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, output_loc, base, src, /* needs_null_check */ false);
         } else {
           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
           codegen->GenerateReadBarrierSlow(
@@ -1919,17 +1991,13 @@
     if (is_volatile) {
       // Need to use XMM to read volatile.
       locations->AddTemp(Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
     } else {
       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
     }
   } else {
-    locations->SetOut(Location::RequiresRegister());
-  }
-  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-    // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
-    locations->AddTemp(Location::RequiresRegister());
+    locations->SetOut(Location::RequiresRegister(),
+                      can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
@@ -2150,9 +2218,9 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented.
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
   // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
@@ -2277,6 +2345,15 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
 
@@ -2630,9 +2707,528 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
+void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations = invoke->GetLocations();
+  X86Assembler* assembler = GetAssembler();
+
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0));
+  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+  Register temp = temp_loc.AsRegister<Register>();
+
+  // Now get declaring class.
+  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags preventing us for using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  } else {
+    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Fast path.
+  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
+  return instruction->InputAt(input0) == instruction->InputAt(input1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+  if (invoke->GetLocations() != nullptr) {
+    // Need a byte register for marking.
+    invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
+
+    static constexpr size_t kSrc = 0;
+    static constexpr size_t kSrcPos = 1;
+    static constexpr size_t kDest = 2;
+    static constexpr size_t kDestPos = 3;
+    static constexpr size_t kLength = 4;
+
+    if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
+        !invoke->InputAt(kDestPos)->IsIntConstant() &&
+        !invoke->InputAt(kLength)->IsIntConstant()) {
+      if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
+          !IsSameInput(invoke, kSrcPos, kLength) &&
+          !IsSameInput(invoke, kDestPos, kLength) &&
+          !IsSameInput(invoke, kSrc, kDest)) {
+        // Not enough registers, make the length also take a stack slot.
+        invoke->GetLocations()->SetInAt(kLength, Location::Any());
+      }
+    }
+  }
+}
+
+void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  Register src = locations->InAt(0).AsRegister<Register>();
+  Location src_pos = locations->InAt(1);
+  Register dest = locations->InAt(2).AsRegister<Register>();
+  Location dest_pos = locations->InAt(3);
+  Location length_arg = locations->InAt(4);
+  Location length = length_arg;
+  Location temp1_loc = locations->GetTemp(0);
+  Register temp1 = temp1_loc.AsRegister<Register>();
+  Location temp2_loc = locations->GetTemp(1);
+  Register temp2 = temp2_loc.AsRegister<Register>();
+
+  SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
+
+  NearLabel conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ cmpl(src, dest);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      }
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ cmpl(src, dest);
+        __ j(kNotEqual, &conditions_on_positions_validated);
+      }
+      __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
+      __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ cmpl(src, dest);
+      __ j(kNotEqual, &conditions_on_positions_validated);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
+      __ j(kLess, intrinsic_slow_path->GetEntryLabel());
+    } else {
+      __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
+      __ j(kLess, intrinsic_slow_path->GetEntryLabel());
+    }
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ testl(src, src);
+    __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ testl(dest, dest);
+    __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  Location temp3_loc = locations->GetTemp(2);
+  Register temp3 = temp3_loc.AsRegister<Register>();
+  if (length.IsStackSlot()) {
+    __ movl(temp3, Address(ESP, length.GetStackIndex()));
+    length = Location::RegisterLocation(temp3);
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
+    __ j(kLess, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+
+    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+        __ testl(temp1, temp1);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp1` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+      } else {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        __ movl(temp1, Address(src, class_offset));
+        __ MaybeUnpoisonHeapReference(temp1);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ movl(temp1, Address(temp1, component_offset));
+        __ testl(temp1, temp1);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(temp1);
+      }
+      __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+    }
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      if (length.Equals(Location::RegisterLocation(temp3))) {
+        // When Baker read barriers are enabled, register `temp3`,
+        // which in the present case contains the `length` parameter,
+        // will be overwritten below.  Make the `length` location
+        // reference the original stack location; it will be moved
+        // back to `temp3` later if necessary.
+        DCHECK(length_arg.IsStackSlot());
+        length = length_arg;
+      }
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        //
+        // Register `temp1` is not trashed by the read barrier emitted
+        // by GenerateFieldLoadWithBakerReadBarrier below, as that
+        // method produces a call to a ReadBarrierMarkRegX entry point,
+        // which saves all potentially live registers, including
+        // temporaries such a `temp1`.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
+        __ testl(temp2, temp2);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // For the same reason given earlier, `temp1` is not trashed by the
+      // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+      // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+      __ cmpl(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        NearLabel do_copy;
+        __ j(kEqual, &do_copy);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ cmpl(Address(temp1, super_offset), Immediate(0));
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+      }
+    } else {
+      // Non read barrier code.
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ movl(temp1, Address(dest, class_offset));
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        __ MaybeUnpoisonHeapReference(temp1);
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        __ movl(temp2, Address(temp1, component_offset));
+        __ testl(temp2, temp2);
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(temp2);
+        __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+        // Re-poison the heap reference to make the compare instruction below
+        // compare two poisoned references.
+        __ PoisonHeapReference(temp1);
+      }
+
+      // Note: if heap poisoning is on, we are comparing two poisoned references here.
+      __ cmpl(temp1, Address(src, class_offset));
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        NearLabel do_copy;
+        __ j(kEqual, &do_copy);
+        __ MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ movl(temp1, Address(temp1, component_offset));
+        __ MaybeUnpoisonHeapReference(temp1);
+        __ cmpl(Address(temp1, super_offset), Immediate(0));
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+      }
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ temp1 = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+      __ testl(temp1, temp1);
+      __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      // If heap poisoning is enabled, `temp1` has been unpoisoned
+      // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ movl(temp1, Address(src, class_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp1 = temp1->component_type_
+      __ movl(temp1, Address(temp1, component_offset));
+      __ testl(temp1, temp1);
+      __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp1);
+    }
+    __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
+    __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // Compute the base source address in `temp1`.
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+  DCHECK_EQ(element_size, 4);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp1, Address(src, element_size * constant + offset));
+  } else {
+    __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // If it is needed (in the case of the fast-path loop), the base
+    // destination address is computed later, as `temp2` is used for
+    // intermediate computations.
+
+    // Compute the end source address in `temp3`.
+    if (length.IsConstant()) {
+      int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+      __ leal(temp3, Address(temp1, element_size * constant));
+    } else {
+      if (length.IsStackSlot()) {
+        // Location `length` is again pointing at a stack slot, as
+        // register `temp3` (which was containing the length parameter
+        // earlier) has been overwritten; restore it now
+        DCHECK(length.Equals(length_arg));
+        __ movl(temp3, Address(ESP, length.GetStackIndex()));
+        length = Location::RegisterLocation(temp3);
+      }
+      __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+    }
+
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       for (size_t i = 0; i != length; ++i) {
+    //         dest_array[dest_pos + i] =
+    //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
+    //       }
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    NearLabel loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ cmpl(temp1, temp3);
+    __ j(kEqual, &done);
+
+    // Given the numeric representation, it's enough to check the low bit of the rb_state.
+    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+    constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+    constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+    // if (rb_state == ReadBarrier::gray_ptr_)
+    //   goto slow_path;
+    // At this point, just do the "if" and make sure that flags are preserved until the branch.
+    __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
+
+    // Load fence to prevent load-load reordering.
+    // Note that this is a no-op, thanks to the x86 memory model.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCode* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // We have done the "if" of the gray bit check above, now branch based on the flags.
+    __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
+
+    // Fast-path copy.
+
+    // Set the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ leal(temp2, Address(dest, element_size * constant + offset));
+    } else {
+      __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+    __ pushl(Address(temp1, 0));
+    __ cfi().AdjustCFAOffset(4);
+    __ popl(Address(temp2, 0));
+    __ cfi().AdjustCFAOffset(-4);
+    __ addl(temp1, Immediate(element_size));
+    __ addl(temp2, Immediate(element_size));
+    __ cmpl(temp1, temp3);
+    __ j(kNotEqual, &loop);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ leal(temp2, Address(dest, element_size * constant + offset));
+    } else {
+      __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+    }
+
+    // Compute the end source address in `temp3`.
+    if (length.IsConstant()) {
+      int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+      __ leal(temp3, Address(temp1, element_size * constant));
+    } else {
+      __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    NearLabel loop, done;
+    __ cmpl(temp1, temp3);
+    __ j(kEqual, &done);
+    __ Bind(&loop);
+    __ pushl(Address(temp1, 0));
+    __ cfi().AdjustCFAOffset(4);
+    __ popl(Address(temp2, 0));
+    __ cfi().AdjustCFAOffset(-4);
+    __ addl(temp1, Immediate(element_size));
+    __ addl(temp2, Immediate(element_size));
+    __ cmpl(temp1, temp3);
+    __ j(kNotEqual, &loop);
+    __ Bind(&done);
+  }
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1,
+                       temp2,
+                       dest,
+                       Register(kNoRegister),
+                       /* value_can_be_null */ false);
+
+  __ Bind(intrinsic_slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index f726a25..a4ee546 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -54,19 +54,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathX86_64 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathX86_64 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -77,6 +64,65 @@
 
 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
 
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
+      : SlowPathCode(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+    CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
+    CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
+    CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+    __ Bind(GetEntryLabel());
+    NearLabel loop;
+    __ Bind(&loop);
+    __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
+    __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    // TODO: Inline the mark bit check before calling the runtime?
+    // TMP = ReadBarrier::Mark(TMP);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+    // This runtime call does not require a stack map.
+    x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    __ MaybePoisonHeapReference(CpuRegister(TMP));
+    __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
+    __ addl(src_curr_addr, Immediate(element_size));
+    __ addl(dst_curr_addr, Immediate(element_size));
+    __ cmpl(src_curr_addr, src_stop_addr);
+    __ j(kNotEqual, &loop);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
+};
+
+#undef __
+
 #define __ assembler->
 
 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -539,7 +585,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -596,12 +642,13 @@
     locations->SetInAt(0, Location::RequiresFpuRegister());
     locations->SetOut(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
     return;
   }
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(RAX));
@@ -610,10 +657,7 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
-  }
+  CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
@@ -623,47 +667,41 @@
     return;
   }
 
-  // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  NearLabel skip_incr, done;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Load 0.5 into inPlusPointFive.
-  __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5f)
+  //    result = result + 1.0f;
+  __ movss(t2, in);
+  __ roundss(t1, in, Immediate(1));
+  __ subss(t2, t1);
+  __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
+  __ j(kBelow, &skip_incr);
+  __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
+  __ Bind(&skip_incr);
 
-  // Add in the input.
-  __ addss(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
-  // Load maxInt into out.
-  codegen_->Load64BitValue(out, kPrimIntMax);
-
-  // if inPlusPointFive >= maxInt goto done
-  __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = float-to-int-truncate(input)
-  __ cvttss2si(out, inPlusPointFive);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
+  codegen_->Load32BitValue(out, kPrimIntMax);
+  __ cvtsi2ss(t2, out);
+  __ comiss(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttss2si(out, t1);
   __ Bind(&done);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
-  }
+  CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
@@ -673,46 +711,43 @@
     return;
   }
 
-  // Implement RoundDouble as t1 = floor(input + 0.5);  convert to long.
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  NearLabel done, nan;
+  XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  NearLabel skip_incr, done;
   X86_64Assembler* assembler = GetAssembler();
 
-  // Load 0.5 into inPlusPointFive.
-  __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
+  // Since no direct x86 rounding instruction matches the required semantics,
+  // this intrinsic is implemented as follows:
+  //  result = floor(in);
+  //  if (in - result >= 0.5)
+  //    result = result + 1.0f;
+  __ movsd(t2, in);
+  __ roundsd(t1, in, Immediate(1));
+  __ subsd(t2, t1);
+  __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
+  __ j(kBelow, &skip_incr);
+  __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
+  __ Bind(&skip_incr);
 
-  // Add in the input.
-  __ addsd(inPlusPointFive, in);
-
-  // And truncate to an integer.
-  __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
-
-  // Load maxLong into out.
+  // Final conversion to an integer. Unfortunately this also does not have a
+  // direct x86 instruction, since NaN should map to 0 and large positive
+  // values need to be clipped to the extreme value.
   codegen_->Load64BitValue(out, kPrimLongMax);
-
-  // if inPlusPointFive >= maxLong goto done
-  __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
-  __ j(kAboveEqual, &done);
-
-  // if input == NaN goto nan
-  __ j(kUnordered, &nan);
-
-  // output = double-to-long-truncate(input)
-  __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true);
-  __ jmp(&done);
-  __ Bind(&nan);
-
-  //  output = 0
-  __ xorl(out, out);
+  __ cvtsi2sd(t2, out, /* is64bit */ true);
+  __ comisd(t1, t2);
+  __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
+  __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
+  __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
+  __ cvttsd2si(out, t1, /* is64bit */ true);
   __ Bind(&done);
 }
 
 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
                                       HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -733,7 +768,7 @@
   DCHECK(invoke->IsInvokeStaticOrDirect());
   X86_64Assembler* assembler = codegen->GetAssembler();
 
-  __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true));
+  __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64PointerSize>(entry), true));
   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
@@ -852,7 +887,7 @@
 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
                                         HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -891,49 +926,6 @@
   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
 }
 
-void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
-  // The inputs plus one temp.
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array.
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count.
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
-  CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  X86_64Assembler* assembler = GetAssembler();
-
-  __ cmpl(idx, Address(obj, count_offset));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ j(kAboveEqual, slow_path->GetEntryLabel());
-
-  // out = out[2*idx].
-  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
@@ -978,7 +970,6 @@
                           CpuRegister input,
                           Location length,
                           SlowPathCode* slow_path,
-                          CpuRegister input_len,
                           CpuRegister temp,
                           bool length_is_input_length = false) {
   // Where is the length in the Array?
@@ -999,12 +990,11 @@
       }
     } else {
       // Check that length(input) >= pos.
-      __ movl(input_len, Address(input, length_offset));
-      __ cmpl(input_len, Immediate(pos_const));
+      __ movl(temp, Address(input, length_offset));
+      __ subl(temp, Immediate(pos_const));
       __ j(kLess, slow_path->GetEntryLabel());
 
       // Check that (length(input) - pos) >= length.
-      __ leal(temp, Address(input_len, -pos_const));
       if (length.IsConstant()) {
         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
       } else {
@@ -1079,11 +1069,11 @@
     __ j(kLess, slow_path->GetEntryLabel());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base);
+  // Validity checks: source. Use src_base as a temporary register.
+  CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
 
-  // Validity checks: dest.
-  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base);
+  // Validity checks: dest. Use src_base as a temporary register.
+  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
 
   // We need the count in RCX.
   if (length.IsConstant()) {
@@ -1122,14 +1112,20 @@
 
 
 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
 }
 
-// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
-// Note that this code path is not used (yet) because we do not
-// intrinsify methods that can go into the IntrinsicSlowPathX86_64
-// slow path.
 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
   X86_64Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
@@ -1137,18 +1133,23 @@
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
   Location src_pos = locations->InAt(1);
   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
   Location dest_pos = locations->InAt(3);
   Location length = locations->InAt(4);
-  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
-  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
-  CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+  Location temp1_loc = locations->GetTemp(0);
+  CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
+  Location temp2_loc = locations->GetTemp(1);
+  CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+  Location temp3_loc = locations->GetTemp(2);
+  CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
+  Location TMP_loc = Location::RegisterLocation(TMP);
 
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
 
   NearLabel conditions_on_positions_validated;
   SystemArrayCopyOptimizations optimizations(invoke);
@@ -1164,7 +1165,7 @@
         DCHECK_GE(src_pos_constant, dest_pos_constant);
       } else if (src_pos_constant < dest_pos_constant) {
         __ cmpl(src, dest);
-        __ j(kEqual, slow_path->GetEntryLabel());
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
       }
     } else {
       if (!optimizations.GetDestinationIsSource()) {
@@ -1172,7 +1173,7 @@
         __ j(kNotEqual, &conditions_on_positions_validated);
       }
       __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
-      __ j(kGreater, slow_path->GetEntryLabel());
+      __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
     }
   } else {
     if (!optimizations.GetDestinationIsSource()) {
@@ -1182,10 +1183,10 @@
     if (dest_pos.IsConstant()) {
       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
       __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
-      __ j(kLess, slow_path->GetEntryLabel());
+      __ j(kLess, intrinsic_slow_path->GetEntryLabel());
     } else {
       __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
-      __ j(kLess, slow_path->GetEntryLabel());
+      __ j(kLess, intrinsic_slow_path->GetEntryLabel());
     }
   }
 
@@ -1194,13 +1195,13 @@
   if (!optimizations.GetSourceIsNotNull()) {
     // Bail out if the source is null.
     __ testl(src, src);
-    __ j(kEqual, slow_path->GetEntryLabel());
+    __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   }
 
   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
     // Bail out if the destination is null.
     __ testl(dest, dest);
-    __ j(kEqual, slow_path->GetEntryLabel());
+    __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
   }
 
   // If the length is negative, bail out.
@@ -1209,7 +1210,7 @@
       !optimizations.GetCountIsSourceLength() &&
       !optimizations.GetCountIsDestinationLength()) {
     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
-    __ j(kLess, slow_path->GetEntryLabel());
+    __ j(kLess, intrinsic_slow_path->GetEntryLabel());
   }
 
   // Validity checks: source.
@@ -1217,9 +1218,8 @@
                 src_pos,
                 src,
                 length,
-                slow_path,
+                intrinsic_slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsSourceLength());
 
   // Validity checks: dest.
@@ -1227,9 +1227,8 @@
                 dest_pos,
                 dest,
                 length,
-                slow_path,
+                intrinsic_slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsDestinationLength());
 
   if (!optimizations.GetDoesNotNeedTypeCheck()) {
@@ -1237,38 +1236,80 @@
     // type of the destination array. We do two checks: the classes are the same,
     // or the destination is Object[]. If none of these checks succeed, we go to the
     // slow path.
-    __ movl(temp1, Address(dest, class_offset));
-    __ movl(temp2, Address(src, class_offset));
+
     bool did_unpoison = false;
-    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
-        !optimizations.GetSourceIsNonPrimitiveArray()) {
-      // One or two of the references need to be unpoisoned. Unpoison them
-      // both to make the identity check valid.
-      __ MaybeUnpoisonHeapReference(temp1);
-      __ MaybeUnpoisonHeapReference(temp2);
-      did_unpoison = true;
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+      // Register `temp1` is not trashed by the read barrier emitted
+      // by GenerateFieldLoadWithBakerReadBarrier below, as that
+      // method produces a call to a ReadBarrierMarkRegX entry point,
+      // which saves all potentially live registers, including
+      // temporaries such a `temp1`.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+      // If heap poisoning is enabled, `temp1` and `temp2` have been
+      // unpoisoned by the the previous calls to
+      // GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ movl(temp1, Address(dest, class_offset));
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      __ movl(temp2, Address(src, class_offset));
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        __ MaybeUnpoisonHeapReference(temp1);
+        __ MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
     }
 
     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
       // Bail out if the destination is not a non primitive array.
-      // /* HeapReference<Class> */ TMP = temp1->component_type_
-      __ movl(CpuRegister(TMP), Address(temp1, component_offset));
-      __ testl(CpuRegister(TMP), CpuRegister(TMP));
-      __ j(kEqual, slow_path->GetEntryLabel());
-      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // /* HeapReference<Class> */ TMP = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+        __ testl(CpuRegister(TMP), CpuRegister(TMP));
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `TMP` has been unpoisoned by
+        // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+      } else {
+        // /* HeapReference<Class> */ TMP = temp1->component_type_
+        __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+        __ testl(CpuRegister(TMP), CpuRegister(TMP));
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      }
       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
     }
 
     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
       // Bail out if the source is not a non primitive array.
-      // /* HeapReference<Class> */ TMP = temp2->component_type_
-      __ movl(CpuRegister(TMP), Address(temp2, component_offset));
-      __ testl(CpuRegister(TMP), CpuRegister(TMP));
-      __ j(kEqual, slow_path->GetEntryLabel());
-      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // For the same reason given earlier, `temp1` is not trashed by the
+        // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+        // /* HeapReference<Class> */ TMP = temp2->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false);
+        __ testl(CpuRegister(TMP), CpuRegister(TMP));
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `TMP` has been unpoisoned by
+        // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+      } else {
+        // /* HeapReference<Class> */ TMP = temp2->component_type_
+        __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+        __ testl(CpuRegister(TMP), CpuRegister(TMP));
+        __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+        __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      }
       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
     }
 
     __ cmpl(temp1, temp2);
@@ -1276,39 +1317,61 @@
     if (optimizations.GetDestinationIsTypedObjectArray()) {
       NearLabel do_copy;
       __ j(kEqual, &do_copy);
-      if (!did_unpoison) {
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ cmpl(Address(temp1, super_offset), Immediate(0));
+      } else {
+        if (!did_unpoison) {
+          __ MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ movl(temp1, Address(temp1, component_offset));
         __ MaybeUnpoisonHeapReference(temp1);
+        // No need to unpoison the following heap reference load, as
+        // we're comparing against null.
+        __ cmpl(Address(temp1, super_offset), Immediate(0));
       }
-      // /* HeapReference<Class> */ temp1 = temp1->component_type_
-      __ movl(temp1, Address(temp1, component_offset));
-      __ MaybeUnpoisonHeapReference(temp1);
-      // /* HeapReference<Class> */ temp1 = temp1->super_class_
-      __ movl(temp1, Address(temp1, super_offset));
-      // No need to unpoison the result, we're comparing against null.
-      __ testl(temp1, temp1);
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
       __ Bind(&do_copy);
     } else {
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
     }
   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
     // Bail out if the source is not a non primitive array.
-    // /* HeapReference<Class> */ temp1 = src->klass_
-    __ movl(temp1, Address(src, class_offset));
-    __ MaybeUnpoisonHeapReference(temp1);
-    // /* HeapReference<Class> */ TMP = temp1->component_type_
-    __ movl(CpuRegister(TMP), Address(temp1, component_offset));
-    __ testl(CpuRegister(TMP), CpuRegister(TMP));
-    __ j(kEqual, slow_path->GetEntryLabel());
-    __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ TMP = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ movl(temp1, Address(src, class_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ TMP = temp1->component_type_
+      __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+      // No need to unpoison `TMP` now, as we're comparing against null.
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    }
     __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
-    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   }
 
   // Compute base source address, base destination address, and end source address.
 
-  uint32_t element_size = sizeof(int32_t);
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
   uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
   if (src_pos.IsConstant()) {
     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
@@ -1331,20 +1394,88 @@
     __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
   }
 
-  // Iterate over the arrays and do a raw copy of the objects. We don't need to
-  // poison/unpoison, nor do any read barrier as the next uses of the destination
-  // array will do it.
-  NearLabel loop, done;
-  __ cmpl(temp1, temp3);
-  __ j(kEqual, &done);
-  __ Bind(&loop);
-  __ movl(CpuRegister(TMP), Address(temp1, 0));
-  __ movl(Address(temp2, 0), CpuRegister(TMP));
-  __ addl(temp1, Immediate(element_size));
-  __ addl(temp2, Immediate(element_size));
-  __ cmpl(temp1, temp3);
-  __ j(kNotEqual, &loop);
-  __ Bind(&done);
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       do {
+    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+    //       } while (src_ptr != end_ptr)
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    NearLabel loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ cmpl(temp1, temp3);
+    __ j(kEqual, &done);
+
+    // Given the numeric representation, it's enough to check the low bit of the rb_state.
+    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+    constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+    constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+    // if (rb_state == ReadBarrier::gray_ptr_)
+    //   goto slow_path;
+    // At this point, just do the "if" and make sure that flags are preserved until the branch.
+    __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
+
+    // Load fence to prevent load-load reordering.
+    // Note that this is a no-op, thanks to the x86-64 memory model.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCode* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // We have done the "if" of the gray bit check above, now branch based on the flags.
+    __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
+
+    // Fast-path copy.
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+    __ movl(CpuRegister(TMP), Address(temp1, 0));
+    __ movl(Address(temp2, 0), CpuRegister(TMP));
+    __ addl(temp1, Immediate(element_size));
+    __ addl(temp2, Immediate(element_size));
+    __ cmpl(temp1, temp3);
+    __ j(kNotEqual, &loop);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    NearLabel loop, done;
+    __ cmpl(temp1, temp3);
+    __ j(kEqual, &done);
+    __ Bind(&loop);
+    __ movl(CpuRegister(TMP), Address(temp1, 0));
+    __ movl(Address(temp2, 0), CpuRegister(TMP));
+    __ addl(temp1, Immediate(element_size));
+    __ addl(temp2, Immediate(element_size));
+    __ cmpl(temp1, temp3);
+    __ j(kNotEqual, &loop);
+    __ Bind(&done);
+  }
 
   // We only need one card marking on the destination array.
   codegen_->MarkGCCard(temp1,
@@ -1353,12 +1484,12 @@
                        CpuRegister(kNoRegister),
                        /* value_can_be_null */ false);
 
-  __ Bind(slow_path->GetExitLabel());
+  __ Bind(intrinsic_slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1379,7 +1510,7 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo),
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pStringCompareTo),
                                   /* no_rip */ true));
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1419,17 +1550,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ testl(arg, arg);
-  __ j(kEqual, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ testl(arg, arg);
+    __ j(kEqual, &return_false);
+  }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ movl(rcx, Address(str, class_offset));
-  __ cmpl(rcx, Address(arg, class_offset));
-  __ j(kNotEqual, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ movl(rcx, Address(str, class_offset));
+    __ cmpl(rcx, Address(arg, class_offset));
+    __ j(kNotEqual, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ cmpl(str, arg);
@@ -1520,10 +1656,11 @@
   DCHECK_EQ(out.AsRegister(), RDI);
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -1533,7 +1670,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
     slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
     codegen->AddSlowPath(slow_path);
@@ -1626,7 +1763,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1646,7 +1783,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes),
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+                                                          pAllocStringFromBytes),
                                   /* no_rip */ true));
   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1655,7 +1793,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1673,7 +1811,8 @@
   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
   //
   // all include a null check on `data` before calling that method.
-  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars),
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+                                                          pAllocStringFromChars),
                                   /* no_rip */ true));
   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1681,7 +1820,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1698,7 +1837,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString),
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+                                                          pAllocStringFromString),
                                   /* no_rip */ true));
   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1924,7 +2064,7 @@
 
 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
-  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(),
+  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
                                                     /* no_rip */ true));
 }
 
@@ -1949,9 +2089,9 @@
     case Primitive::kPrimNot: {
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
-          Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          Address src(base, offset, ScaleFactor::TIMES_1, 0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, output_loc, base, src, /* needs_null_check */ false);
         } else {
           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
           codegen->GenerateReadBarrierSlow(
@@ -1974,9 +2114,7 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
-                                          HInvoke* invoke,
-                                          Primitive::Type type) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1988,31 +2126,27 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
-  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-    // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+  CreateIntIntIntToIntLocations(arena_, invoke);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+  CreateIntIntIntToIntLocations(arena_, invoke);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+  CreateIntIntIntToIntLocations(arena_, invoke);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+  CreateIntIntIntToIntLocations(arena_, invoke);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+  CreateIntIntIntToIntLocations(arena_, invoke);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+  CreateIntIntIntToIntLocations(arena_, invoke);
 }
 
 
@@ -2175,9 +2309,9 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented.
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
   // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
@@ -2293,6 +2427,15 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
 
@@ -2481,7 +2624,7 @@
                       : CTZ(static_cast<uint32_t>(value));
     }
     if (is_long) {
-      codegen->Load64BitValue(out, 1L << value);
+      codegen->Load64BitValue(out, 1ULL << value);
     } else {
       codegen->Load32BitValue(out, 1 << value);
     }
@@ -2721,7 +2864,65 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
+void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations = invoke->GetLocations();
+  X86_64Assembler* assembler = GetAssembler();
+
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0));
+  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+  // Now get declaring class.
+  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags preventing us for using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  } else {
+    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Fast path.
+  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 7543cd6..a0ded74 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -30,8 +30,8 @@
 static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
   DCHECK(instruction->IsInLoop());
   HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
-  for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
-    HLoopInformation* input_loop = it.Current()->GetBlock()->GetLoopInformation();
+  for (const HInstruction* input : instruction->GetInputs()) {
+    HLoopInformation* input_loop = input->GetBlock()->GetLoopInformation();
     // We only need to check whether the input is defined in the loop. If it is not
     // it is defined before the loop.
     if (input_loop != nullptr && input_loop->IsIn(*info)) {
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index d446539..2a62643 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -169,13 +169,11 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get array with different types.
-  // ArrayGet is typed as kPrimByte and ArraySet given a float value in order to
-  // avoid SsaBuilder's typing of ambiguous array operations from reference type info.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, int_constant_, Primitive::kPrimByte, 0);
+      parameter_, int_constant_, Primitive::kPrimInt, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, int_constant_, float_constant_, Primitive::kPrimShort, 0);
+      parameter_, int_constant_, float_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
@@ -189,13 +187,11 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get array with same types.
-  // ArrayGet is typed as kPrimByte and ArraySet given a float value in order to
-  // avoid SsaBuilder's typing of ambiguous array operations from reference type info.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, int_constant_, Primitive::kPrimByte, 0);
+      parameter_, int_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, get_array, float_constant_, Primitive::kPrimByte, 0);
+      parameter_, get_array, float_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 874f3be..7347686 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -488,7 +488,7 @@
                             // alias analysis and won't be as effective.
   bool has_volatile_;       // If there are volatile field accesses.
   bool has_monitor_operations_;    // If there are monitor operations.
-  bool may_deoptimize_;
+  bool may_deoptimize_;     // Only true for HDeoptimize with single-frame deoptimization.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
 };
@@ -559,19 +559,20 @@
     }
 
     // At this point, stores in possibly_removed_stores_ can be safely removed.
-    size = possibly_removed_stores_.size();
-    for (size_t i = 0; i < size; i++) {
+    for (size_t i = 0, e = possibly_removed_stores_.size(); i < e; i++) {
       HInstruction* store = possibly_removed_stores_[i];
       DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
       store->GetBlock()->RemoveInstruction(store);
     }
 
-    // TODO: remove unnecessary allocations.
-    // Eliminate instructions in singleton_new_instances_ that:
-    // - don't have uses,
-    // - don't have finalizers,
-    // - are instantiable and accessible,
-    // - have no/separate clinit check.
+    // Eliminate allocations that are not used.
+    for (size_t i = 0, e = singleton_new_instances_.size(); i < e; i++) {
+      HInstruction* new_instance = singleton_new_instances_[i];
+      if (!new_instance->HasNonEnvironmentUses()) {
+        new_instance->RemoveEnvironmentUsers();
+        new_instance->GetBlock()->RemoveInstruction(new_instance);
+      }
+    }
   }
 
  private:
@@ -972,8 +973,8 @@
     if (!heap_location_collector_.MayDeoptimize() &&
         ref_info->IsSingletonAndNotReturned() &&
         !new_instance->IsFinalizable() &&
-        !new_instance->CanThrow()) {
-      // TODO: add new_instance to singleton_new_instances_ and enable allocation elimination.
+        !new_instance->NeedsAccessCheck()) {
+      singleton_new_instances_.push_back(new_instance);
     }
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[new_instance->GetBlock()->GetBlockId()];
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 63bbc2c..5fdfb9b 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -38,7 +38,13 @@
 class Location : public ValueObject {
  public:
   enum OutputOverlap {
+    // The liveness of the output overlaps the liveness of one or
+    // several input(s); the register allocator cannot reuse an
+    // input's location for the output's location.
     kOutputOverlap,
+    // The liveness of the output does not overlap the liveness of any
+    // input; the register allocator is allowed to reuse an input's
+    // location for the output's location.
     kNoOutputOverlap
   };
 
@@ -370,6 +376,10 @@
     return PolicyField::Decode(GetPayload());
   }
 
+  bool RequiresRegisterKind() const {
+    return GetPolicy() == kRequiresRegister || GetPolicy() == kRequiresFpuRegister;
+  }
+
   uintptr_t GetEncoding() const {
     return GetPayload();
   }
@@ -474,8 +484,9 @@
  public:
   enum CallKind {
     kNoCall,
+    kCallOnMainAndSlowPath,
     kCallOnSlowPath,
-    kCall
+    kCallOnMainOnly
   };
 
   LocationSummary(HInstruction* instruction,
@@ -494,6 +505,10 @@
     return inputs_.size();
   }
 
+  // Set the output location.  Argument `overlaps` tells whether the
+  // output overlaps any of the inputs (if so, it cannot share the
+  // same register as one of the inputs); it is set to
+  // `Location::kOutputOverlap` by default for safety.
   void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) {
     DCHECK(output_.IsInvalid());
     output_overlaps_ = overlaps;
@@ -530,10 +545,29 @@
 
   Location Out() const { return output_; }
 
-  bool CanCall() const { return call_kind_ != kNoCall; }
-  bool WillCall() const { return call_kind_ == kCall; }
-  bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
-  bool NeedsSafepoint() const { return CanCall(); }
+  bool CanCall() const {
+    return call_kind_ != kNoCall;
+  }
+
+  bool WillCall() const {
+    return call_kind_ == kCallOnMainOnly || call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool CallsOnSlowPath() const {
+    return call_kind_ == kCallOnSlowPath || call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool OnlyCallsOnSlowPath() const {
+    return call_kind_ == kCallOnSlowPath;
+  }
+
+  bool CallsOnMainAndSlowPath() const {
+    return call_kind_ == kCallOnMainAndSlowPath;
+  }
+
+  bool NeedsSafepoint() const {
+    return CanCall();
+  }
 
   void SetStackBit(uint32_t index) {
     stack_mask_->SetBit(index);
@@ -619,8 +653,7 @@
   // Whether these are locations for an intrinsified call.
   bool intrinsified_;
 
-  ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
-  ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
+  friend class RegisterAllocatorTest;
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
 };
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ca76bc0..2808e1b 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -101,10 +101,7 @@
 }
 
 static void RemoveAsUser(HInstruction* instruction) {
-  for (size_t i = 0; i < instruction->InputCount(); i++) {
-    instruction->RemoveAsUserOfInput(i);
-  }
-
+  instruction->RemoveAsUserOfAllInputs();
   RemoveEnvironmentUses(instruction);
 }
 
@@ -757,8 +754,9 @@
 }
 
 static void UpdateInputsUsers(HInstruction* instruction) {
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    instruction->InputAt(i)->AddUseAt(instruction, i);
+  HInputsRef inputs = instruction->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    inputs[i]->AddUseAt(instruction, i);
   }
   // Environment should be created later.
   DCHECK(!instruction->HasEnvironment());
@@ -787,22 +785,6 @@
   RemoveInstruction(initial);
 }
 
-void HBasicBlock::MoveInstructionBefore(HInstruction* insn, HInstruction* cursor) {
-  DCHECK(!cursor->IsPhi());
-  DCHECK(!insn->IsPhi());
-  DCHECK(!insn->IsControlFlow());
-  DCHECK(insn->CanBeMoved());
-  DCHECK(!insn->HasSideEffects());
-
-  HBasicBlock* from_block = insn->GetBlock();
-  HBasicBlock* to_block = cursor->GetBlock();
-  DCHECK(from_block != to_block);
-
-  from_block->RemoveInstruction(insn, /* ensure_safety */ false);
-  insn->SetBlock(to_block);
-  to_block->instructions_.InsertInstructionBefore(insn, cursor);
-}
-
 static void Add(HInstructionList* instruction_list,
                 HBasicBlock* block,
                 HInstruction* instruction) {
@@ -1126,9 +1108,10 @@
 void HPhi::RemoveInputAt(size_t index) {
   RemoveAsUserOfInput(index);
   inputs_.erase(inputs_.begin() + index);
-  for (size_t i = index, e = InputCount(); i < e; ++i) {
-    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u);
-    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  // Update indexes in use nodes of inputs that have been pulled forward by the erase().
+  for (size_t i = index, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i + 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
   }
 }
 
@@ -1324,16 +1307,18 @@
   return this == instruction->GetPreviousDisregardingMoves();
 }
 
-bool HInstruction::Equals(HInstruction* other) const {
+bool HInstruction::Equals(const HInstruction* other) const {
   if (!InstructionTypeEquals(other)) return false;
   DCHECK_EQ(GetKind(), other->GetKind());
   if (!InstructionDataEquals(other)) return false;
   if (GetType() != other->GetType()) return false;
-  if (InputCount() != other->InputCount()) return false;
-
-  for (size_t i = 0, e = InputCount(); i < e; ++i) {
-    if (InputAt(i) != other->InputAt(i)) return false;
+  HConstInputsRef inputs = GetInputs();
+  HConstInputsRef other_inputs = other->GetInputs();
+  if (inputs.size() != other_inputs.size()) return false;
+  for (size_t i = 0; i != inputs.size(); ++i) {
+    if (inputs[i] != other_inputs[i]) return false;
   }
+
   DCHECK_EQ(ComputeHashCode(), other->ComputeHashCode());
   return true;
 }
@@ -1351,6 +1336,11 @@
 }
 
 void HInstruction::MoveBefore(HInstruction* cursor) {
+  DCHECK(!IsPhi());
+  DCHECK(!IsControlFlow());
+  DCHECK(CanBeMoved());
+  DCHECK(!cursor->IsPhi());
+
   next_->previous_ = previous_;
   if (previous_ != nullptr) {
     previous_->next_ = next_;
@@ -2390,9 +2380,9 @@
   inputs_.insert(inputs_.begin() + index, HUserRecord<HInstruction*>(input));
   input->AddUseAt(this, index);
   // Update indexes in use nodes of inputs that have been pushed further back by the insert().
-  for (size_t i = index + 1u, size = inputs_.size(); i != size; ++i) {
-    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i - 1u);
-    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  for (size_t i = index + 1u, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i - 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
   }
 }
 
@@ -2400,9 +2390,9 @@
   RemoveAsUserOfInput(index);
   inputs_.erase(inputs_.begin() + index);
   // Update indexes in use nodes of inputs that have been pulled forward by the erase().
-  for (size_t i = index, e = InputCount(); i < e; ++i) {
-    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u);
-    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  for (size_t i = index, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i + 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
   }
 }
 
@@ -2440,8 +2430,69 @@
   }
 }
 
-bool HLoadString::InstructionDataEquals(HInstruction* other) const {
-  HLoadString* other_load_string = other->AsLoadString();
+bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
+  const HLoadClass* other_load_class = other->AsLoadClass();
+  // TODO: To allow GVN for HLoadClass from different dex files, we should compare the type
+  // names rather than type indexes. However, we shall also have to re-think the hash code.
+  if (type_index_ != other_load_class->type_index_ ||
+      GetPackedFields() != other_load_class->GetPackedFields()) {
+    return false;
+  }
+  LoadKind load_kind = GetLoadKind();
+  if (HasAddress(load_kind)) {
+    return GetAddress() == other_load_class->GetAddress();
+  } else if (HasTypeReference(load_kind)) {
+    return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile());
+  } else {
+    DCHECK(HasDexCacheReference(load_kind)) << load_kind;
+    // If the type indexes and dex files are the same, dex cache element offsets
+    // must also be the same, so we don't need to compare them.
+    return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile());
+  }
+}
+
+void HLoadClass::SetLoadKindInternal(LoadKind load_kind) {
+  // Once sharpened, the load kind should not be changed again.
+  // Also, kReferrersClass should never be overwritten.
+  DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+  SetPackedField<LoadKindField>(load_kind);
+
+  if (load_kind != LoadKind::kDexCacheViaMethod) {
+    RemoveAsUserOfInput(0u);
+    SetRawInputAt(0u, nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
+  switch (rhs) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      return os << "ReferrersClass";
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      return os << "BootImageLinkTimeAddress";
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      return os << "BootImageLinkTimePcRelative";
+    case HLoadClass::LoadKind::kBootImageAddress:
+      return os << "BootImageAddress";
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      return os << "DexCacheAddress";
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      return os << "DexCachePcRelative";
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      return os << "DexCacheViaMethod";
+    default:
+      LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
+bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
+  const HLoadString* other_load_string = other->AsLoadString();
+  // TODO: To allow GVN for HLoadString from different dex files, we should compare the strings
+  // rather than their indexes. However, we shall also have to re-think the hash code.
   if (string_index_ != other_load_string->string_index_ ||
       GetPackedFields() != other_load_string->GetPackedFields()) {
     return false;
@@ -2581,4 +2632,23 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) {
+  switch (kind) {
+    case MemBarrierKind::kAnyStore:
+      return os << "AnyStore";
+    case MemBarrierKind::kLoadAny:
+      return os << "LoadAny";
+    case MemBarrierKind::kStoreStore:
+      return os << "StoreStore";
+    case MemBarrierKind::kAnyAny:
+      return os << "AnyAny";
+    case MemBarrierKind::kNTStoreStore:
+      return os << "NTStoreStore";
+
+    default:
+      LOG(FATAL) << "Unknown MemBarrierKind: " << static_cast<int>(kind);
+      UNREACHABLE();
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f3915a2..dfa8276 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -25,7 +25,7 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/stl_util.h"
-#include "dex/compiler_enums.h"
+#include "dex_file.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
 #include "handle_scope.h"
@@ -37,6 +37,7 @@
 #include "primitive.h"
 #include "utils/array_ref.h"
 #include "utils/intrusive_forward_list.h"
+#include "utils/transform_array_ref.h"
 
 namespace art {
 
@@ -84,6 +85,16 @@
 
 static constexpr uint32_t kNoDexPc = -1;
 
+inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
+  // For the purposes of the compiler, the dex files must actually be the same object
+  // if we want to safely treat them as the same. This is especially important for JIT
+  // as custom class loaders can open the same underlying file (or memory) multiple
+  // times and provide different class resolution but no two class loaders should ever
+  // use the same DexFile object - doing so is an unsupported hack that can lead to
+  // all sorts of weird failures.
+  return &lhs == &rhs;
+}
+
 enum IfCondition {
   // All types.
   kCondEQ,  // ==
@@ -160,6 +171,10 @@
 
   static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact);
 
+  static ReferenceTypeInfo Create(TypeHandle type_handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return Create(type_handle, type_handle->CannotBeAssignedFromOtherTypes());
+  }
+
   static ReferenceTypeInfo CreateUnchecked(TypeHandle type_handle, bool is_exact) {
     return ReferenceTypeInfo(type_handle, is_exact);
   }
@@ -1048,7 +1063,6 @@
   // Replace instruction `initial` with `replacement` within this block.
   void ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                        HInstruction* replacement);
-  void MoveInstructionBefore(HInstruction* insn, HInstruction* cursor);
   void AddPhi(HPhi* phi);
   void InsertPhiAfter(HPhi* instruction, HPhi* cursor);
   // RemoveInstruction and RemovePhi delete a given instruction from the respective
@@ -1274,7 +1288,8 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
   M(BitwiseNegatedRight, Instruction)                                   \
-  M(MultiplyAccumulate, Instruction)
+  M(MultiplyAccumulate, Instruction)                                    \
+  M(IntermediateAddress, Instruction)
 #endif
 
 #ifndef ART_ENABLE_CODEGEN_arm
@@ -1288,11 +1303,16 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64DataProcWithShifterOp, Instruction)                            \
-  M(Arm64IntermediateAddress, Instruction)
+  M(Arm64DataProcWithShifterOp, Instruction)
 #endif
 
+#ifndef ART_ENABLE_CODEGEN_mips
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                           \
+  M(MipsComputeBaseMethodAddress, Instruction)                          \
+  M(MipsDexCacheArraysBase, Instruction)
+#endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)
 
@@ -1333,12 +1353,12 @@
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
 #undef FORWARD_DECLARATION
 
-#define DECLARE_INSTRUCTION(type)                                       \
-  InstructionKind GetKindInternal() const OVERRIDE { return k##type; }  \
-  const char* DebugName() const OVERRIDE { return #type; }              \
-  bool InstructionTypeEquals(HInstruction* other) const OVERRIDE {      \
-    return other->Is##type();                                           \
-  }                                                                     \
+#define DECLARE_INSTRUCTION(type)                                         \
+  InstructionKind GetKindInternal() const OVERRIDE { return k##type; }    \
+  const char* DebugName() const OVERRIDE { return #type; }                \
+  bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE {  \
+    return other->Is##type();                                             \
+  }                                                                       \
   void Accept(HGraphVisitor* visitor) OVERRIDE
 
 #define DECLARE_ABSTRACT_INSTRUCTION(type)                              \
@@ -1400,6 +1420,21 @@
   typename HUseList<T>::iterator before_use_node_;
 };
 
+// Helper class that extracts the input instruction from HUserRecord<HInstruction*>.
+// This is used for HInstruction::GetInputs() to return a container wrapper providing
+// HInstruction* values even though the underlying container has HUserRecord<>s.
+struct HInputExtractor {
+  HInstruction* operator()(HUserRecord<HInstruction*>& record) const {
+    return record.GetInstruction();
+  }
+  const HInstruction* operator()(const HUserRecord<HInstruction*>& record) const {
+    return record.GetInstruction();
+  }
+};
+
+using HInputsRef = TransformArrayRef<HUserRecord<HInstruction*>, HInputExtractor>;
+using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInputExtractor>;
+
 /**
  * Side-effects representation.
  *
@@ -1468,21 +1503,21 @@
   static SideEffects FieldWriteOfType(Primitive::Type type, bool is_volatile) {
     return is_volatile
         ? AllWritesAndReads()
-        : SideEffects(TypeFlagWithAlias(type, kFieldWriteOffset));
+        : SideEffects(TypeFlag(type, kFieldWriteOffset));
   }
 
   static SideEffects ArrayWriteOfType(Primitive::Type type) {
-    return SideEffects(TypeFlagWithAlias(type, kArrayWriteOffset));
+    return SideEffects(TypeFlag(type, kArrayWriteOffset));
   }
 
   static SideEffects FieldReadOfType(Primitive::Type type, bool is_volatile) {
     return is_volatile
         ? AllWritesAndReads()
-        : SideEffects(TypeFlagWithAlias(type, kFieldReadOffset));
+        : SideEffects(TypeFlag(type, kFieldReadOffset));
   }
 
   static SideEffects ArrayReadOfType(Primitive::Type type) {
-    return SideEffects(TypeFlagWithAlias(type, kArrayReadOffset));
+    return SideEffects(TypeFlag(type, kArrayReadOffset));
   }
 
   static SideEffects CanTriggerGC() {
@@ -1609,23 +1644,6 @@
   static constexpr uint64_t kAllReads =
       ((1ULL << (kLastBitForReads + 1 - kFieldReadOffset)) - 1) << kFieldReadOffset;
 
-  // Work around the fact that HIR aliases I/F and J/D.
-  // TODO: remove this interceptor once HIR types are clean
-  static uint64_t TypeFlagWithAlias(Primitive::Type type, int offset) {
-    switch (type) {
-      case Primitive::kPrimInt:
-      case Primitive::kPrimFloat:
-        return TypeFlag(Primitive::kPrimInt, offset) |
-               TypeFlag(Primitive::kPrimFloat, offset);
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        return TypeFlag(Primitive::kPrimLong, offset) |
-               TypeFlag(Primitive::kPrimDouble, offset);
-      default:
-        return TypeFlag(type, offset);
-    }
-  }
-
   // Translates type to bit flag.
   static uint64_t TypeFlag(Primitive::Type type, int offset) {
     CHECK_NE(type, Primitive::kPrimVoid);
@@ -1798,16 +1816,33 @@
     return IsLoopHeaderPhi() && GetBlock()->GetLoopInformation()->IsIrreducible();
   }
 
-  virtual size_t InputCount() const = 0;
+  virtual ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() = 0;
+
+  ArrayRef<const HUserRecord<HInstruction*>> GetInputRecords() const {
+    // One virtual method is enough, just const_cast<> and then re-add the const.
+    return ArrayRef<const HUserRecord<HInstruction*>>(
+        const_cast<HInstruction*>(this)->GetInputRecords());
+  }
+
+  HInputsRef GetInputs() {
+    return MakeTransformArrayRef(GetInputRecords(), HInputExtractor());
+  }
+
+  HConstInputsRef GetInputs() const {
+    return MakeTransformArrayRef(GetInputRecords(), HInputExtractor());
+  }
+
+  size_t InputCount() const { return GetInputRecords().size(); }
   HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
 
+  void SetRawInputAt(size_t index, HInstruction* input) {
+    SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
+  }
+
   virtual void Accept(HGraphVisitor* visitor) = 0;
   virtual const char* DebugName() const = 0;
 
   virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; }
-  void SetRawInputAt(size_t index, HInstruction* input) {
-    SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
-  }
 
   virtual bool NeedsEnvironment() const { return false; }
 
@@ -1872,6 +1907,14 @@
     input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
   }
 
+  void RemoveAsUserOfAllInputs() {
+    for (const HUserRecord<HInstruction*>& input_use : GetInputRecords()) {
+      HUseList<HInstruction*>::iterator before_use_node = input_use.GetBeforeUseNode();
+      input_use.GetInstruction()->uses_.erase_after(before_use_node);
+      input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
+    }
+  }
+
   const HUseList<HInstruction*>& GetUses() const { return uses_; }
   const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; }
 
@@ -1904,6 +1947,14 @@
     environment_ = environment;
   }
 
+  void InsertRawEnvironment(HEnvironment* environment) {
+    DCHECK(environment_ != nullptr);
+    DCHECK_EQ(environment->GetHolder(), this);
+    DCHECK(environment->GetParent() == nullptr);
+    environment->parent_ = environment_;
+    environment_ = environment;
+  }
+
   void RemoveEnvironment();
 
   // Set the environment of this instruction, copying it from `environment`. While
@@ -1976,21 +2027,21 @@
   virtual bool CanBeMoved() const { return false; }
 
   // Returns whether the two instructions are of the same kind.
-  virtual bool InstructionTypeEquals(HInstruction* other ATTRIBUTE_UNUSED) const {
+  virtual bool InstructionTypeEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
     return false;
   }
 
   // Returns whether any data encoded in the two instructions is equal.
   // This method does not look at the inputs. Both instructions must be
   // of the same type, otherwise the method has undefined behavior.
-  virtual bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const {
+  virtual bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
     return false;
   }
 
   // Returns whether two instructions are equal, that is:
   // 1) They have the same type and contain the same data (InstructionDataEquals).
   // 2) Their inputs are identical.
-  bool Equals(HInstruction* other) const;
+  bool Equals(const HInstruction* other) const;
 
   // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744)
   // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide
@@ -2001,8 +2052,8 @@
 
   virtual size_t ComputeHashCode() const {
     size_t result = GetKind();
-    for (size_t i = 0, e = InputCount(); i < e; ++i) {
-      result = (result * 31) + InputAt(i)->GetId();
+    for (const HInstruction* input : GetInputs()) {
+      result = (result * 31) + input->GetId();
     }
     return result;
   }
@@ -2052,8 +2103,14 @@
   static constexpr size_t kNumberOfGenericPackedBits = kFlagReferenceTypeIsExact + 1;
   static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte;
 
-  virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
-  virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const {
+    return GetInputRecords()[i];
+  }
+
+  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) {
+    ArrayRef<HUserRecord<HInstruction*>> input_records = GetInputRecords();
+    input_records[index] = input;
+  }
 
   uint32_t GetPackedFields() const {
     return packed_fields_;
@@ -2174,21 +2231,6 @@
 };
 std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs);
 
-class HInputIterator : public ValueObject {
- public:
-  explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) {}
-
-  bool Done() const { return index_ == instruction_->InputCount(); }
-  HInstruction* Current() const { return instruction_->InputAt(index_); }
-  void Advance() { index_++; }
-
- private:
-  HInstruction* instruction_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HInputIterator);
-};
-
 class HInstructionIterator : public ValueObject {
  public:
   explicit HInstructionIterator(const HInstructionList& instructions)
@@ -2238,17 +2280,9 @@
       : HInstruction(side_effects, dex_pc), inputs_() {}
   virtual ~HTemplateInstruction() {}
 
-  size_t InputCount() const OVERRIDE { return N; }
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
-    DCHECK_LT(i, N);
-    return inputs_[i];
-  }
-
-  void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    DCHECK_LT(i, N);
-    inputs_[i] = input;
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
   }
 
  private:
@@ -2266,18 +2300,9 @@
 
   virtual ~HTemplateInstruction() {}
 
-  size_t InputCount() const OVERRIDE { return 0; }
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i ATTRIBUTE_UNUSED) const OVERRIDE {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-
-  void SetRawInputRecordAt(size_t i ATTRIBUTE_UNUSED,
-                           const HUserRecord<HInstruction*>& input ATTRIBUTE_UNUSED) OVERRIDE {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>();
   }
 
  private:
@@ -2309,7 +2334,7 @@
 
 // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
 // instruction that branches to the exit block.
-class HReturnVoid : public HTemplateInstruction<0> {
+class HReturnVoid FINAL : public HTemplateInstruction<0> {
  public:
   explicit HReturnVoid(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {}
@@ -2324,7 +2349,7 @@
 
 // Represents dex's RETURN opcodes. A HReturn is a control flow
 // instruction that branches to the exit block.
-class HReturn : public HTemplateInstruction<1> {
+class HReturn FINAL : public HTemplateInstruction<1> {
  public:
   explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
@@ -2339,7 +2364,7 @@
   DISALLOW_COPY_AND_ASSIGN(HReturn);
 };
 
-class HPhi : public HInstruction {
+class HPhi FINAL : public HInstruction {
  public:
   HPhi(ArenaAllocator* arena,
        uint32_t reg_number,
@@ -2365,7 +2390,10 @@
 
   bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
 
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
+  }
 
   void AddInput(HInstruction* input);
   void RemoveInputAt(size_t index);
@@ -2392,7 +2420,7 @@
   bool IsDead() const { return !IsLive(); }
   bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
 
-  bool IsVRegEquivalentOf(HInstruction* other) const {
+  bool IsVRegEquivalentOf(const HInstruction* other) const {
     return other != nullptr
         && other->IsPhi()
         && other->AsPhi()->GetBlock() == GetBlock()
@@ -2415,15 +2443,6 @@
 
   DECLARE_INSTRUCTION(Phi);
 
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
-
  private:
   static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
   static constexpr size_t kFieldTypeSize =
@@ -2434,7 +2453,7 @@
   static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
   using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
 
-  ArenaVector<HUserRecord<HInstruction*> > inputs_;
+  ArenaVector<HUserRecord<HInstruction*>> inputs_;
   const uint32_t reg_number_;
 
   DISALLOW_COPY_AND_ASSIGN(HPhi);
@@ -2443,7 +2462,7 @@
 // The exit instruction is the only instruction of the exit block.
 // Instructions aborting the method (HThrow and HReturn) must branch to the
 // exit block.
-class HExit : public HTemplateInstruction<0> {
+class HExit FINAL : public HTemplateInstruction<0> {
  public:
   explicit HExit(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {}
 
@@ -2456,7 +2475,7 @@
 };
 
 // Jumps from one block to another.
-class HGoto : public HTemplateInstruction<0> {
+class HGoto FINAL : public HTemplateInstruction<0> {
  public:
   explicit HGoto(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {}
 
@@ -2496,9 +2515,9 @@
   DISALLOW_COPY_AND_ASSIGN(HConstant);
 };
 
-class HNullConstant : public HConstant {
+class HNullConstant FINAL : public HConstant {
  public:
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -2520,7 +2539,7 @@
 
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
-class HIntConstant : public HConstant {
+class HIntConstant FINAL : public HConstant {
  public:
   int32_t GetValue() const { return value_; }
 
@@ -2528,7 +2547,7 @@
     return static_cast<uint64_t>(static_cast<uint32_t>(value_));
   }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     DCHECK(other->IsIntConstant()) << other->DebugName();
     return other->AsIntConstant()->value_ == value_;
   }
@@ -2561,13 +2580,13 @@
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
-class HLongConstant : public HConstant {
+class HLongConstant FINAL : public HConstant {
  public:
   int64_t GetValue() const { return value_; }
 
   uint64_t GetValueAsUint64() const OVERRIDE { return value_; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     DCHECK(other->IsLongConstant()) << other->DebugName();
     return other->AsLongConstant()->value_ == value_;
   }
@@ -2591,7 +2610,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLongConstant);
 };
 
-class HFloatConstant : public HConstant {
+class HFloatConstant FINAL : public HConstant {
  public:
   float GetValue() const { return value_; }
 
@@ -2599,7 +2618,7 @@
     return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
   }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     DCHECK(other->IsFloatConstant()) << other->DebugName();
     return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
   }
@@ -2644,13 +2663,13 @@
   DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
 };
 
-class HDoubleConstant : public HConstant {
+class HDoubleConstant FINAL : public HConstant {
  public:
   double GetValue() const { return value_; }
 
   uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     DCHECK(other->IsDoubleConstant()) << other->DebugName();
     return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
   }
@@ -2697,7 +2716,7 @@
 
 // Conditional branch. A block ending with an HIf instruction must have
 // two successors.
-class HIf : public HTemplateInstruction<1> {
+class HIf FINAL : public HTemplateInstruction<1> {
  public:
   explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
@@ -2726,7 +2745,7 @@
 // non-exceptional control flow.
 // Normal-flow successor is stored at index zero, exception handlers under
 // higher indices in no particular order.
-class HTryBoundary : public HTemplateInstruction<0> {
+class HTryBoundary FINAL : public HTemplateInstruction<0> {
  public:
   enum class BoundaryKind {
     kEntry,
@@ -2784,7 +2803,7 @@
 };
 
 // Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HTemplateInstruction<1> {
  public:
   // We set CanTriggerGC to prevent any intermediate address to be live
   // at the point of the `HDeoptimize`.
@@ -2794,7 +2813,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool NeedsEnvironment() const OVERRIDE { return true; }
@@ -2809,7 +2828,7 @@
 // Represents the ArtMethod that was passed as a first argument to
 // the method. It is used by instructions that depend on it, like
 // instructions that work with the dex cache.
-class HCurrentMethod : public HExpression<0> {
+class HCurrentMethod FINAL : public HExpression<0> {
  public:
   explicit HCurrentMethod(Primitive::Type type, uint32_t dex_pc = kNoDexPc)
       : HExpression(type, SideEffects::None(), dex_pc) {}
@@ -2822,7 +2841,7 @@
 
 // Fetches an ArtMethod from the virtual table or the interface method table
 // of a class.
-class HClassTableGet : public HExpression<1> {
+class HClassTableGet FINAL : public HExpression<1> {
  public:
   enum class TableKind {
     kVTable,
@@ -2841,7 +2860,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return other->AsClassTableGet()->GetIndex() == index_ &&
         other->AsClassTableGet()->GetPackedFields() == GetPackedFields();
   }
@@ -2869,7 +2888,7 @@
 // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will
 // have one successor for each entry in the switch table, and the final successor
 // will be the block containing the next Dex opcode.
-class HPackedSwitch : public HTemplateInstruction<1> {
+class HPackedSwitch FINAL : public HTemplateInstruction<1> {
  public:
   HPackedSwitch(int32_t start_value,
                 uint32_t num_entries,
@@ -2911,7 +2930,7 @@
   Primitive::Type GetResultType() const { return GetType(); }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -2983,7 +3002,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -3056,7 +3075,7 @@
   ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); }
   void SetBias(ComparisonBias bias) { SetPackedField<ComparisonBiasField>(bias); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return GetPackedFields() == other->AsCondition()->GetPackedFields();
   }
 
@@ -3114,7 +3133,7 @@
 };
 
 // Instruction to check if two inputs are equal to each other.
-class HEqual : public HCondition {
+class HEqual FINAL : public HCondition {
  public:
   HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3153,12 +3172,12 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x == y; }
+  template <typename T> static bool Compute(T x, T y) { return x == y; }
 
   DISALLOW_COPY_AND_ASSIGN(HEqual);
 };
 
-class HNotEqual : public HCondition {
+class HNotEqual FINAL : public HCondition {
  public:
   HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3196,12 +3215,12 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x != y; }
+  template <typename T> static bool Compute(T x, T y) { return x != y; }
 
   DISALLOW_COPY_AND_ASSIGN(HNotEqual);
 };
 
-class HLessThan : public HCondition {
+class HLessThan FINAL : public HCondition {
  public:
   HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3233,12 +3252,12 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x < y; }
+  template <typename T> static bool Compute(T x, T y) { return x < y; }
 
   DISALLOW_COPY_AND_ASSIGN(HLessThan);
 };
 
-class HLessThanOrEqual : public HCondition {
+class HLessThanOrEqual FINAL : public HCondition {
  public:
   HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3270,12 +3289,12 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x <= y; }
+  template <typename T> static bool Compute(T x, T y) { return x <= y; }
 
   DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
 };
 
-class HGreaterThan : public HCondition {
+class HGreaterThan FINAL : public HCondition {
  public:
   HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3307,12 +3326,12 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x > y; }
+  template <typename T> static bool Compute(T x, T y) { return x > y; }
 
   DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
 };
 
-class HGreaterThanOrEqual : public HCondition {
+class HGreaterThanOrEqual FINAL : public HCondition {
  public:
   HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3344,12 +3363,12 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x >= y; }
+  template <typename T> static bool Compute(T x, T y) { return x >= y; }
 
   DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
 };
 
-class HBelow : public HCondition {
+class HBelow FINAL : public HCondition {
  public:
   HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3382,14 +3401,14 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const {
+  template <typename T> static bool Compute(T x, T y) {
     return MakeUnsigned(x) < MakeUnsigned(y);
   }
 
   DISALLOW_COPY_AND_ASSIGN(HBelow);
 };
 
-class HBelowOrEqual : public HCondition {
+class HBelowOrEqual FINAL : public HCondition {
  public:
   HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3422,14 +3441,14 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const {
+  template <typename T> static bool Compute(T x, T y) {
     return MakeUnsigned(x) <= MakeUnsigned(y);
   }
 
   DISALLOW_COPY_AND_ASSIGN(HBelowOrEqual);
 };
 
-class HAbove : public HCondition {
+class HAbove FINAL : public HCondition {
  public:
   HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3462,14 +3481,14 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const {
+  template <typename T> static bool Compute(T x, T y) {
     return MakeUnsigned(x) > MakeUnsigned(y);
   }
 
   DISALLOW_COPY_AND_ASSIGN(HAbove);
 };
 
-class HAboveOrEqual : public HCondition {
+class HAboveOrEqual FINAL : public HCondition {
  public:
   HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3502,7 +3521,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const {
+  template <typename T> static bool Compute(T x, T y) {
     return MakeUnsigned(x) >= MakeUnsigned(y);
   }
 
@@ -3511,7 +3530,7 @@
 
 // Instruction to check how two inputs compare to each other.
 // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
-class HCompare : public HBinaryOperation {
+class HCompare FINAL : public HBinaryOperation {
  public:
   // Note that `comparison_type` is the type of comparison performed
   // between the comparison's inputs, not the type of the instantiated
@@ -3560,7 +3579,7 @@
     return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
   }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return GetPackedFields() == other->AsCompare()->GetPackedFields();
   }
 
@@ -3600,21 +3619,21 @@
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
 
-class HNewInstance : public HExpression<2> {
+class HNewInstance FINAL : public HExpression<2> {
  public:
   HNewInstance(HInstruction* cls,
                HCurrentMethod* current_method,
                uint32_t dex_pc,
                uint16_t type_index,
                const DexFile& dex_file,
-               bool can_throw,
+               bool needs_access_check,
                bool finalizable,
                QuickEntrypointEnum entrypoint)
       : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
         type_index_(type_index),
         dex_file_(dex_file),
         entrypoint_(entrypoint) {
-    SetPackedFlag<kFlagCanThrow>(can_throw);
+    SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagFinalizable>(finalizable);
     SetRawInputAt(0, cls);
     SetRawInputAt(1, current_method);
@@ -3626,10 +3645,11 @@
   // Calls runtime so needs an environment.
   bool NeedsEnvironment() const OVERRIDE { return true; }
 
-  // It may throw when called on type that's not instantiable/accessible.
-  // It can throw OOME.
-  // TODO: distinguish between the two cases so we can for example allow allocation elimination.
-  bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>() || true; }
+  // Can throw errors when out-of-memory or if it's not instantiable/accessible.
+  bool CanThrow() const OVERRIDE { return true; }
+
+  // Needs to call into runtime to make sure it's instantiable/accessible.
+  bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); }
 
   bool IsFinalizable() const { return GetPackedFlag<kFlagFinalizable>(); }
 
@@ -3646,8 +3666,8 @@
   DECLARE_INSTRUCTION(NewInstance);
 
  private:
-  static constexpr size_t kFlagCanThrow = kNumberOfExpressionPackedBits;
-  static constexpr size_t kFlagFinalizable = kFlagCanThrow + 1;
+  static constexpr size_t kFlagNeedsAccessCheck = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFlagFinalizable = kFlagNeedsAccessCheck + 1;
   static constexpr size_t kNumberOfNewInstancePackedBits = kFlagFinalizable + 1;
   static_assert(kNumberOfNewInstancePackedBits <= kMaxNumberOfPackedBits,
                 "Too many packed fields.");
@@ -3689,10 +3709,13 @@
 
 class HInvoke : public HInstruction {
  public:
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
-
   bool NeedsEnvironment() const OVERRIDE;
 
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
+  }
+
   void SetArgumentAt(size_t index, HInstruction* argument) {
     SetRawInputAt(index, argument);
   }
@@ -3729,7 +3752,7 @@
 
   bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_;
   }
 
@@ -3780,14 +3803,6 @@
     SetPackedFlag<kFlagCanThrow>(true);
   }
 
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
-
   void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
 
   uint32_t number_of_arguments_;
@@ -3802,7 +3817,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvoke);
 };
 
-class HInvokeUnresolved : public HInvoke {
+class HInvokeUnresolved FINAL : public HInvoke {
  public:
   HInvokeUnresolved(ArenaAllocator* arena,
                     uint32_t number_of_arguments,
@@ -3825,7 +3840,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeUnresolved);
 };
 
-class HInvokeStaticOrDirect : public HInvoke {
+class HInvokeStaticOrDirect FINAL : public HInvoke {
  public:
   // Requirements of this method call regarding the class
   // initialization (clinit) check of its declaring class.
@@ -3954,6 +3969,25 @@
     InsertInputAt(GetSpecialInputIndex(), input);
   }
 
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+    ArrayRef<HUserRecord<HInstruction*>> input_records = HInvoke::GetInputRecords();
+    if (kIsDebugBuild && IsStaticWithExplicitClinitCheck()) {
+      DCHECK(!input_records.empty());
+      DCHECK_GT(input_records.size(), GetNumberOfArguments());
+      HInstruction* last_input = input_records.back().GetInstruction();
+      // Note: `last_input` may be null during arguments setup.
+      if (last_input != nullptr) {
+        // `last_input` is the last input of a static invoke marked as having
+        // an explicit clinit check. It must either be:
+        // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or
+        // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation.
+        DCHECK(last_input->IsClinitCheck() || last_input->IsLoadClass()) << last_input->DebugName();
+      }
+    }
+    return input_records;
+  }
+
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // We access the method via the dex cache so we can't do an implicit null check.
     // TODO: for intrinsics we can generate implicit null checks.
@@ -4038,8 +4072,8 @@
   // instruction; only relevant for static calls with explicit clinit check.
   void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) {
     DCHECK(IsStaticWithExplicitClinitCheck());
-    size_t last_input_index = InputCount() - 1;
-    HInstruction* last_input = InputAt(last_input_index);
+    size_t last_input_index = inputs_.size() - 1u;
+    HInstruction* last_input = inputs_.back().GetInstruction();
     DCHECK(last_input != nullptr);
     DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName();
     RemoveAsUserOfInput(last_input_index);
@@ -4068,20 +4102,6 @@
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
  protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
-    const HUserRecord<HInstruction*> input_record = HInvoke::InputRecordAt(i);
-    if (kIsDebugBuild && IsStaticWithExplicitClinitCheck() && (i == InputCount() - 1)) {
-      HInstruction* input = input_record.GetInstruction();
-      // `input` is the last input of a static invoke marked as having
-      // an explicit clinit check. It must either be:
-      // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or
-      // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation.
-      DCHECK(input != nullptr);
-      DCHECK(input->IsClinitCheck() || input->IsLoadClass()) << input->DebugName();
-    }
-    return input_record;
-  }
-
   void InsertInputAt(size_t index, HInstruction* input);
   void RemoveInputAt(size_t index);
 
@@ -4114,7 +4134,7 @@
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
-class HInvokeVirtual : public HInvoke {
+class HInvokeVirtual FINAL : public HInvoke {
  public:
   HInvokeVirtual(ArenaAllocator* arena,
                  uint32_t number_of_arguments,
@@ -4140,7 +4160,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual);
 };
 
-class HInvokeInterface : public HInvoke {
+class HInvokeInterface FINAL : public HInvoke {
  public:
   HInvokeInterface(ArenaAllocator* arena,
                    uint32_t number_of_arguments,
@@ -4167,14 +4187,14 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNeg : public HUnaryOperation {
+class HNeg FINAL : public HUnaryOperation {
  public:
   HNeg(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(result_type, input, dex_pc) {
     DCHECK_EQ(result_type, Primitive::PrimitiveKind(input->GetType()));
   }
 
-  template <typename T> T Compute(T x) const { return -x; }
+  template <typename T> static T Compute(T x) { return -x; }
 
   HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
@@ -4195,7 +4215,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNeg);
 };
 
-class HNewArray : public HExpression<2> {
+class HNewArray FINAL : public HExpression<2> {
  public:
   HNewArray(HInstruction* length,
             HCurrentMethod* current_method,
@@ -4234,7 +4254,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNewArray);
 };
 
-class HAdd : public HBinaryOperation {
+class HAdd FINAL : public HBinaryOperation {
  public:
   HAdd(Primitive::Type result_type,
        HInstruction* left,
@@ -4244,7 +4264,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x + y; }
+  template <typename T> static T Compute(T x, T y) { return x + y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4269,7 +4289,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAdd);
 };
 
-class HSub : public HBinaryOperation {
+class HSub FINAL : public HBinaryOperation {
  public:
   HSub(Primitive::Type result_type,
        HInstruction* left,
@@ -4277,7 +4297,7 @@
        uint32_t dex_pc = kNoDexPc)
       : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {}
 
-  template <typename T> T Compute(T x, T y) const { return x - y; }
+  template <typename T> static T Compute(T x, T y) { return x - y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4302,7 +4322,7 @@
   DISALLOW_COPY_AND_ASSIGN(HSub);
 };
 
-class HMul : public HBinaryOperation {
+class HMul FINAL : public HBinaryOperation {
  public:
   HMul(Primitive::Type result_type,
        HInstruction* left,
@@ -4312,7 +4332,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x * y; }
+  template <typename T> static T Compute(T x, T y) { return x * y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4337,7 +4357,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMul);
 };
 
-class HDiv : public HBinaryOperation {
+class HDiv FINAL : public HBinaryOperation {
  public:
   HDiv(Primitive::Type result_type,
        HInstruction* left,
@@ -4389,7 +4409,7 @@
   DISALLOW_COPY_AND_ASSIGN(HDiv);
 };
 
-class HRem : public HBinaryOperation {
+class HRem FINAL : public HBinaryOperation {
  public:
   HRem(Primitive::Type result_type,
        HInstruction* left,
@@ -4440,7 +4460,7 @@
   DISALLOW_COPY_AND_ASSIGN(HRem);
 };
 
-class HDivZeroCheck : public HExpression<1> {
+class HDivZeroCheck FINAL : public HExpression<1> {
  public:
   // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
   // constructor.
@@ -4453,7 +4473,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4466,7 +4486,7 @@
   DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck);
 };
 
-class HShl : public HBinaryOperation {
+class HShl FINAL : public HBinaryOperation {
  public:
   HShl(Primitive::Type result_type,
        HInstruction* value,
@@ -4478,7 +4498,7 @@
   }
 
   template <typename T>
-  T Compute(T value, int32_t distance, int32_t max_shift_distance) const {
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
     return value << (distance & max_shift_distance);
   }
 
@@ -4512,7 +4532,7 @@
   DISALLOW_COPY_AND_ASSIGN(HShl);
 };
 
-class HShr : public HBinaryOperation {
+class HShr FINAL : public HBinaryOperation {
  public:
   HShr(Primitive::Type result_type,
        HInstruction* value,
@@ -4524,7 +4544,7 @@
   }
 
   template <typename T>
-  T Compute(T value, int32_t distance, int32_t max_shift_distance) const {
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
     return value >> (distance & max_shift_distance);
   }
 
@@ -4558,7 +4578,7 @@
   DISALLOW_COPY_AND_ASSIGN(HShr);
 };
 
-class HUShr : public HBinaryOperation {
+class HUShr FINAL : public HBinaryOperation {
  public:
   HUShr(Primitive::Type result_type,
         HInstruction* value,
@@ -4570,7 +4590,7 @@
   }
 
   template <typename T>
-  T Compute(T value, int32_t distance, int32_t max_shift_distance) const {
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
     typedef typename std::make_unsigned<T>::type V;
     V ux = static_cast<V>(value);
     return static_cast<T>(ux >> (distance & max_shift_distance));
@@ -4606,7 +4626,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUShr);
 };
 
-class HAnd : public HBinaryOperation {
+class HAnd FINAL : public HBinaryOperation {
  public:
   HAnd(Primitive::Type result_type,
        HInstruction* left,
@@ -4616,7 +4636,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x & y; }
+  template <typename T> static T Compute(T x, T y) { return x & y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4643,7 +4663,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAnd);
 };
 
-class HOr : public HBinaryOperation {
+class HOr FINAL : public HBinaryOperation {
  public:
   HOr(Primitive::Type result_type,
       HInstruction* left,
@@ -4653,7 +4673,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x | y; }
+  template <typename T> static T Compute(T x, T y) { return x | y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4680,7 +4700,7 @@
   DISALLOW_COPY_AND_ASSIGN(HOr);
 };
 
-class HXor : public HBinaryOperation {
+class HXor FINAL : public HBinaryOperation {
  public:
   HXor(Primitive::Type result_type,
        HInstruction* left,
@@ -4690,7 +4710,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x ^ y; }
+  template <typename T> static T Compute(T x, T y) { return x ^ y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4717,7 +4737,7 @@
   DISALLOW_COPY_AND_ASSIGN(HXor);
 };
 
-class HRor : public HBinaryOperation {
+class HRor FINAL : public HBinaryOperation {
  public:
   HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance)
     : HBinaryOperation(result_type, value, distance) {
@@ -4726,7 +4746,7 @@
   }
 
   template <typename T>
-  T Compute(T value, int32_t distance, int32_t max_shift_value) const {
+  static T Compute(T value, int32_t distance, int32_t max_shift_value) {
     typedef typename std::make_unsigned<T>::type V;
     V ux = static_cast<V>(value);
     if ((distance & max_shift_value) == 0) {
@@ -4770,7 +4790,7 @@
 
 // The value of a parameter in this method. Its location depends on
 // the calling convention.
-class HParameterValue : public HExpression<0> {
+class HParameterValue FINAL : public HExpression<0> {
  public:
   HParameterValue(const DexFile& dex_file,
                   uint16_t type_index,
@@ -4812,17 +4832,17 @@
   DISALLOW_COPY_AND_ASSIGN(HParameterValue);
 };
 
-class HNot : public HUnaryOperation {
+class HNot FINAL : public HUnaryOperation {
  public:
   HNot(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(result_type, input, dex_pc) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
-  template <typename T> T Compute(T x) const { return ~x; }
+  template <typename T> static T Compute(T x) { return ~x; }
 
   HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
@@ -4845,17 +4865,17 @@
   DISALLOW_COPY_AND_ASSIGN(HNot);
 };
 
-class HBooleanNot : public HUnaryOperation {
+class HBooleanNot FINAL : public HUnaryOperation {
  public:
   explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(Primitive::Type::kPrimBoolean, input, dex_pc) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
-  template <typename T> bool Compute(T x) const {
+  template <typename T> static bool Compute(T x) {
     DCHECK(IsUint<1>(x)) << x;
     return !x;
   }
@@ -4882,7 +4902,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBooleanNot);
 };
 
-class HTypeConversion : public HExpression<1> {
+class HTypeConversion FINAL : public HExpression<1> {
  public:
   // Instantiate a type conversion of `input` to `result_type`.
   HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc)
@@ -4899,7 +4919,9 @@
   Primitive::Type GetResultType() const { return GetType(); }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
 
   // Try to statically evaluate the conversion and return a HConstant
   // containing the result.  If the input cannot be converted, return nullptr.
@@ -4925,7 +4947,7 @@
 
 static constexpr uint32_t kNoRegNumber = -1;
 
-class HNullCheck : public HExpression<1> {
+class HNullCheck FINAL : public HExpression<1> {
  public:
   // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
   // constructor.
@@ -4935,7 +4957,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4987,7 +5009,7 @@
   const Handle<mirror::DexCache> dex_cache_;
 };
 
-class HInstanceFieldGet : public HExpression<1> {
+class HInstanceFieldGet FINAL : public HExpression<1> {
  public:
   HInstanceFieldGet(HInstruction* value,
                     Primitive::Type field_type,
@@ -5013,13 +5035,13 @@
 
   bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    HInstanceFieldGet* other_get = other->AsInstanceFieldGet();
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    const HInstanceFieldGet* other_get = other->AsInstanceFieldGet();
     return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
   }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
+    return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
   }
 
   size_t ComputeHashCode() const OVERRIDE {
@@ -5039,7 +5061,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceFieldGet);
 };
 
-class HInstanceFieldSet : public HTemplateInstruction<2> {
+class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HInstanceFieldSet(HInstruction* object,
                     HInstruction* value,
@@ -5066,7 +5088,7 @@
   }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
+    return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
   }
 
   const FieldInfo& GetFieldInfo() const { return field_info_; }
@@ -5090,22 +5112,21 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceFieldSet);
 };
 
-class HArrayGet : public HExpression<2> {
+class HArrayGet FINAL : public HExpression<2> {
  public:
   HArrayGet(HInstruction* array,
             HInstruction* index,
             Primitive::Type type,
             uint32_t dex_pc,
-            SideEffects additional_side_effects = SideEffects::None())
-      : HExpression(type,
-                    SideEffects::ArrayReadOfType(type).Union(additional_side_effects),
-                    dex_pc) {
+            bool is_string_char_at = false)
+      : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
+    SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at);
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -5133,28 +5154,35 @@
     return result;
   }
 
+  bool IsStringCharAt() const { return GetPackedFlag<kFlagIsStringCharAt>(); }
+
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
 
   DECLARE_INSTRUCTION(ArrayGet);
 
  private:
+  // We treat a String as an array, creating the HArrayGet from String.charAt()
+  // intrinsic in the instruction simplifier. We can always determine whether
+  // a particular HArrayGet is actually a String.charAt() by looking at the type
+  // of the input but that requires holding the mutator lock, so we prefer to use
+  // a flag, so that code generators don't need to do the locking.
+  static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits;
+  static constexpr size_t kNumberOfArrayGetPackedBits = kFlagIsStringCharAt + 1;
+  static_assert(kNumberOfArrayGetPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HArrayGet);
 };
 
-class HArraySet : public HTemplateInstruction<3> {
+class HArraySet FINAL : public HTemplateInstruction<3> {
  public:
   HArraySet(HInstruction* array,
             HInstruction* index,
             HInstruction* value,
             Primitive::Type expected_component_type,
-            uint32_t dex_pc,
-            SideEffects additional_side_effects = SideEffects::None())
-      : HTemplateInstruction(
-            SideEffects::ArrayWriteOfType(expected_component_type).Union(
-                SideEffectsForArchRuntimeCalls(value->GetType())).Union(
-                    additional_side_effects),
-            dex_pc) {
+            uint32_t dex_pc)
+      : HTemplateInstruction(SideEffects::None(), dex_pc) {
     SetPackedField<ExpectedComponentTypeField>(expected_component_type);
     SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == Primitive::kPrimNot);
     SetPackedFlag<kFlagValueCanBeNull>(true);
@@ -5162,6 +5190,8 @@
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
+    // Make a best guess now, may be refined during SSA building.
+    ComputeSideEffects();
   }
 
   bool NeedsEnvironment() const OVERRIDE {
@@ -5214,6 +5244,12 @@
     return GetPackedField<ExpectedComponentTypeField>();
   }
 
+  void ComputeSideEffects() {
+    Primitive::Type type = GetComponentType();
+    SetSideEffects(SideEffects::ArrayWriteOfType(type).Union(
+        SideEffectsForArchRuntimeCalls(type)));
+  }
+
   static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type value_type) {
     return (value_type == Primitive::kPrimNot) ? SideEffects::CanTriggerGC() : SideEffects::None();
   }
@@ -5239,42 +5275,59 @@
   DISALLOW_COPY_AND_ASSIGN(HArraySet);
 };
 
-class HArrayLength : public HExpression<1> {
+class HArrayLength FINAL : public HExpression<1> {
  public:
-  HArrayLength(HInstruction* array, uint32_t dex_pc)
+  HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+    SetPackedFlag<kFlagIsStringLength>(is_string_length);
     // Note that arrays do not change length, so the instruction does not
     // depend on any write.
     SetRawInputAt(0, array);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     return obj == InputAt(0);
   }
 
+  bool IsStringLength() const { return GetPackedFlag<kFlagIsStringLength>(); }
+
   DECLARE_INSTRUCTION(ArrayLength);
 
  private:
+  // We treat a String as an array, creating the HArrayLength from String.length()
+  // or String.isEmpty() intrinsic in the instruction simplifier. We can always
+  // determine whether a particular HArrayLength is actually a String.length() by
+  // looking at the type of the input but that requires holding the mutator lock, so
+  // we prefer to use a flag, so that code generators don't need to do the locking.
+  static constexpr size_t kFlagIsStringLength = kNumberOfExpressionPackedBits;
+  static constexpr size_t kNumberOfArrayLengthPackedBits = kFlagIsStringLength + 1;
+  static_assert(kNumberOfArrayLengthPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HArrayLength);
 };
 
-class HBoundsCheck : public HExpression<2> {
+class HBoundsCheck FINAL : public HExpression<2> {
  public:
   // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
   // constructor.
-  HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
-      : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
+  HBoundsCheck(HInstruction* index,
+               HInstruction* length,
+               uint32_t dex_pc,
+               uint32_t string_char_at_method_index = DexFile::kDexNoIndex)
+      : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc),
+        string_char_at_method_index_(string_char_at_method_index) {
     DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(index->GetType()));
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -5282,15 +5335,27 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
+  bool IsStringCharAt() const { return GetStringCharAtMethodIndex() != DexFile::kDexNoIndex; }
+  uint32_t GetStringCharAtMethodIndex() const { return string_char_at_method_index_; }
+
   HInstruction* GetIndex() const { return InputAt(0); }
 
   DECLARE_INSTRUCTION(BoundsCheck);
 
  private:
+  // We treat a String as an array, creating the HBoundsCheck from String.charAt()
+  // intrinsic in the instruction simplifier. We want to include the String.charAt()
+  // in the stack trace if we actually throw the StringIndexOutOfBoundsException,
+  // so we need to create an HEnvironment which will be translated to an InlineInfo
+  // indicating the extra stack frame. Since we add this HEnvironment quite late,
+  // in the PrepareForRegisterAllocation pass, we need to remember the method index
+  // from the invoke as we don't want to look again at the dex bytecode.
+  uint32_t string_char_at_method_index_;  // DexFile::kDexNoIndex if regular array.
+
   DISALLOW_COPY_AND_ASSIGN(HBoundsCheck);
 };
 
-class HSuspendCheck : public HTemplateInstruction<0> {
+class HSuspendCheck FINAL : public HTemplateInstruction<0> {
  public:
   explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {}
@@ -5332,8 +5397,44 @@
 /**
  * Instruction to load a Class object.
  */
-class HLoadClass : public HExpression<1> {
+class HLoadClass FINAL : public HInstruction {
  public:
+  // Determines how to load the Class.
+  enum class LoadKind {
+    // Use the Class* from the method's own ArtMethod*.
+    kReferrersClass,
+
+    // Use boot image Class* address that will be known at link time.
+    // Used for boot image classes referenced by boot image code in non-PIC mode.
+    kBootImageLinkTimeAddress,
+
+    // Use PC-relative boot image Class* address that will be known at link time.
+    // Used for boot image classes referenced by boot image code in PIC mode.
+    kBootImageLinkTimePcRelative,
+
+    // Use a known boot image Class* address, embedded in the code by the codegen.
+    // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
+    // Note: codegen needs to emit a linker patch if indicated by compiler options'
+    // GetIncludePatchInformation().
+    kBootImageAddress,
+
+    // Load from the resolved types array at an absolute address.
+    // Used for classes outside the boot image referenced by JIT-compiled code.
+    kDexCacheAddress,
+
+    // Load from resolved types array in the dex cache using a PC-relative load.
+    // Used for classes outside boot image when we know that we can access
+    // the dex cache arrays using a PC-relative load.
+    kDexCachePcRelative,
+
+    // Load from resolved types array accessed through the class loaded from
+    // the compiled method's own ArtMethod*. This is the default access type when
+    // all other types are unavailable.
+    kDexCacheViaMethod,
+
+    kLast = kDexCacheViaMethod
+  };
+
   HLoadClass(HCurrentMethod* current_method,
              uint16_t type_index,
              const DexFile& dex_file,
@@ -5341,7 +5442,8 @@
              uint32_t dex_pc,
              bool needs_access_check,
              bool is_in_dex_cache)
-      : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
+      : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
+        special_input_(HUserRecord<HInstruction*>(current_method)),
         type_index_(type_index),
         dex_file_(dex_file),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
@@ -5349,26 +5451,47 @@
     // methods so we can't possibly end up in this situation.
     DCHECK(!is_referrers_class || !needs_access_check);
 
-    SetPackedFlag<kFlagIsReferrersClass>(is_referrers_class);
+    SetPackedField<LoadKindField>(
+        is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
-    SetRawInputAt(0, current_method);
+  }
+
+  void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
+    DCHECK(HasAddress(load_kind));
+    load_data_.address = address;
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithTypeReference(LoadKind load_kind,
+                                    const DexFile& dex_file,
+                                    uint32_t type_index) {
+    DCHECK(HasTypeReference(load_kind));
+    DCHECK(IsSameDexFile(dex_file_, dex_file));
+    DCHECK_EQ(type_index_, type_index);
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithDexCacheReference(LoadKind load_kind,
+                                        const DexFile& dex_file,
+                                        uint32_t element_index) {
+    DCHECK(HasDexCacheReference(load_kind));
+    DCHECK(IsSameDexFile(dex_file_, dex_file));
+    load_data_.dex_cache_element_index = element_index;
+    SetLoadKindInternal(load_kind);
+  }
+
+  LoadKind GetLoadKind() const {
+    return GetPackedField<LoadKindField>();
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    // Note that we don't need to test for generate_clinit_check_.
-    // Whether or not we need to generate the clinit check is processed in
-    // prepare_for_register_allocator based on existing HInvokes and HClinitChecks.
-    return other->AsLoadClass()->type_index_ == type_index_ &&
-        other->AsLoadClass()->GetPackedFields() == GetPackedFields();
-  }
+  bool InstructionDataEquals(const HInstruction* other) const;
 
   size_t ComputeHashCode() const OVERRIDE { return type_index_; }
 
-  uint16_t GetTypeIndex() const { return type_index_; }
   bool CanBeNull() const OVERRIDE { return false; }
 
   bool NeedsEnvironment() const OVERRIDE {
@@ -5403,7 +5526,15 @@
     loaded_class_rti_ = rti;
   }
 
-  const DexFile& GetDexFile() { return dex_file_; }
+  uint32_t GetTypeIndex() const { return type_index_; }
+  const DexFile& GetDexFile() const { return dex_file_; }
+
+  uint32_t GetDexCacheElementOffset() const;
+
+  uint64_t GetAddress() const {
+    DCHECK(HasAddress(GetLoadKind()));
+    return load_data_.address;
+  }
 
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return !IsReferrersClass(); }
 
@@ -5411,32 +5542,101 @@
     return SideEffects::CanTriggerGC();
   }
 
-  bool IsReferrersClass() const { return GetPackedFlag<kFlagIsReferrersClass>(); }
+  bool IsReferrersClass() const { return GetLoadKind() == LoadKind::kReferrersClass; }
   bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); }
   bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); }
   bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); }
 
+  void MarkInDexCache() {
+    SetPackedFlag<kFlagIsInDexCache>(true);
+    DCHECK(!NeedsEnvironment());
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+
+  void AddSpecialInput(HInstruction* special_input);
+
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(
+        &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
+  }
+
+  Primitive::Type GetType() const OVERRIDE {
+    return Primitive::kPrimNot;
+  }
+
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
-  static constexpr size_t kFlagIsReferrersClass    = kNumberOfExpressionPackedBits;
-  static constexpr size_t kFlagNeedsAccessCheck    = kFlagIsReferrersClass + 1;
+  static constexpr size_t kFlagNeedsAccessCheck    = kNumberOfGenericPackedBits;
   static constexpr size_t kFlagIsInDexCache        = kFlagNeedsAccessCheck + 1;
   // Whether this instruction must generate the initialization check.
   // Used for code generation.
   static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInDexCache + 1;
-  static constexpr size_t kNumberOfLoadClassPackedBits = kFlagGenerateClInitCheck + 1;
+  static constexpr size_t kFieldLoadKind           = kFlagGenerateClInitCheck + 1;
+  static constexpr size_t kFieldLoadKindSize =
+      MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
+  static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
   static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
+  using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
+
+  static bool HasTypeReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageLinkTimeAddress ||
+        load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == LoadKind::kDexCacheViaMethod ||
+        load_kind == LoadKind::kReferrersClass;
+  }
+
+  static bool HasAddress(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
+  }
+
+  static bool HasDexCacheReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kDexCachePcRelative;
+  }
+
+  void SetLoadKindInternal(LoadKind load_kind);
+
+  // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass.
+  // For other load kinds it's empty or possibly some architecture-specific instruction
+  // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
+  HUserRecord<HInstruction*> special_input_;
 
   const uint16_t type_index_;
   const DexFile& dex_file_;
 
+  union {
+    uint32_t dex_cache_element_index;   // Only for dex cache reference.
+    uint64_t address;  // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets.
+  } load_data_;
+
   ReferenceTypeInfo loaded_class_rti_;
 
   DISALLOW_COPY_AND_ASSIGN(HLoadClass);
 };
+std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
 
-class HLoadString : public HExpression<1> {
+// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
+inline uint32_t HLoadClass::GetDexCacheElementOffset() const {
+  DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind();
+  return load_data_.dex_cache_element_index;
+}
+
+// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
+inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
+  // The special input is used for PC-relative loads on some architectures,
+  // including literal pool loads, which are PC-relative too.
+  DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+         GetLoadKind() == LoadKind::kDexCachePcRelative ||
+         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
+  DCHECK(special_input_.GetInstruction() == nullptr);
+  special_input_ = HUserRecord<HInstruction*>(special_input);
+  special_input->AddUseAt(this, 0);
+}
+
+class HLoadString FINAL : public HInstruction {
  public:
   // Determines how to load the String.
   enum class LoadKind {
@@ -5475,12 +5675,12 @@
               uint32_t string_index,
               const DexFile& dex_file,
               uint32_t dex_pc)
-      : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
+      : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
+        special_input_(HUserRecord<HInstruction*>(current_method)),
         string_index_(string_index) {
     SetPackedFlag<kFlagIsInDexCache>(false);
     SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod);
     load_data_.ref.dex_file = &dex_file;
-    SetRawInputAt(0, current_method);
   }
 
   void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
@@ -5527,7 +5727,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE;
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE;
 
   size_t ComputeHashCode() const OVERRIDE { return string_index_; }
 
@@ -5563,16 +5763,22 @@
     SetSideEffects(SideEffects::None());
   }
 
-  size_t InputCount() const OVERRIDE {
-    return (InputAt(0) != nullptr) ? 1u : 0u;
+  void AddSpecialInput(HInstruction* special_input);
+
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(
+        &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
   }
 
-  void AddSpecialInput(HInstruction* special_input);
+  Primitive::Type GetType() const OVERRIDE {
+    return Primitive::kPrimNot;
+  }
 
   DECLARE_INSTRUCTION(LoadString);
 
  private:
-  static constexpr size_t kFlagIsInDexCache = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFlagIsInDexCache = kNumberOfGenericPackedBits;
   static constexpr size_t kFieldLoadKind = kFlagIsInDexCache + 1;
   static constexpr size_t kFieldLoadKindSize =
       MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
@@ -5596,6 +5802,11 @@
 
   void SetLoadKindInternal(LoadKind load_kind);
 
+  // The special input is the HCurrentMethod for kDexCacheViaMethod.
+  // For other load kinds it's empty or possibly some architecture-specific instruction
+  // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
+  HUserRecord<HInstruction*> special_input_;
+
   // String index serves also as the hash code and it's also needed for slow-paths,
   // so it must not be overwritten with other load data.
   uint32_t string_index_;
@@ -5627,18 +5838,23 @@
 
 // Note: defined outside class to see operator<<(., HLoadString::LoadKind).
 inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
-  // The special input is used for PC-relative loads on some architectures.
+  // The special input is used for PC-relative loads on some architectures,
+  // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
-         GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
-  DCHECK(InputAt(0) == nullptr);
-  SetRawInputAt(0u, special_input);
+         GetLoadKind() == LoadKind::kDexCachePcRelative ||
+         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
+  // HLoadString::GetInputRecords() returns an empty array at this point,
+  // so use the GetInputRecords() from the base class to set the input record.
+  DCHECK(special_input_.GetInstruction() == nullptr);
+  special_input_ = HUserRecord<HInstruction*>(special_input);
   special_input->AddUseAt(this, 0);
 }
 
 /**
  * Performs an initialization check on its Class object input.
  */
-class HClinitCheck : public HExpression<1> {
+class HClinitCheck FINAL : public HExpression<1> {
  public:
   HClinitCheck(HLoadClass* constant, uint32_t dex_pc)
       : HExpression(
@@ -5649,7 +5865,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -5668,7 +5884,7 @@
   DISALLOW_COPY_AND_ASSIGN(HClinitCheck);
 };
 
-class HStaticFieldGet : public HExpression<1> {
+class HStaticFieldGet FINAL : public HExpression<1> {
  public:
   HStaticFieldGet(HInstruction* cls,
                   Primitive::Type field_type,
@@ -5695,8 +5911,8 @@
 
   bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    HStaticFieldGet* other_get = other->AsStaticFieldGet();
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    const HStaticFieldGet* other_get = other->AsStaticFieldGet();
     return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
   }
 
@@ -5717,7 +5933,7 @@
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldGet);
 };
 
-class HStaticFieldSet : public HTemplateInstruction<2> {
+class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HStaticFieldSet(HInstruction* cls,
                   HInstruction* value,
@@ -5765,7 +5981,7 @@
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldSet);
 };
 
-class HUnresolvedInstanceFieldGet : public HExpression<1> {
+class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> {
  public:
   HUnresolvedInstanceFieldGet(HInstruction* obj,
                               Primitive::Type field_type,
@@ -5790,7 +6006,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldGet);
 };
 
-class HUnresolvedInstanceFieldSet : public HTemplateInstruction<2> {
+class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HUnresolvedInstanceFieldSet(HInstruction* obj,
                               HInstruction* value,
@@ -5828,7 +6044,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldSet);
 };
 
-class HUnresolvedStaticFieldGet : public HExpression<0> {
+class HUnresolvedStaticFieldGet FINAL : public HExpression<0> {
  public:
   HUnresolvedStaticFieldGet(Primitive::Type field_type,
                             uint32_t field_index,
@@ -5851,7 +6067,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldGet);
 };
 
-class HUnresolvedStaticFieldSet : public HTemplateInstruction<1> {
+class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> {
  public:
   HUnresolvedStaticFieldSet(HInstruction* value,
                             Primitive::Type field_type,
@@ -5888,7 +6104,7 @@
 };
 
 // Implement the move-exception DEX instruction.
-class HLoadException : public HExpression<0> {
+class HLoadException FINAL : public HExpression<0> {
  public:
   explicit HLoadException(uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc) {}
@@ -5903,7 +6119,7 @@
 
 // Implicit part of move-exception which clears thread-local exception storage.
 // Must not be removed because the runtime expects the TLS to get cleared.
-class HClearException : public HTemplateInstruction<0> {
+class HClearException FINAL : public HTemplateInstruction<0> {
  public:
   explicit HClearException(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::AllWrites(), dex_pc) {}
@@ -5914,7 +6130,7 @@
   DISALLOW_COPY_AND_ASSIGN(HClearException);
 };
 
-class HThrow : public HTemplateInstruction<1> {
+class HThrow FINAL : public HTemplateInstruction<1> {
  public:
   HThrow(HInstruction* exception, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
@@ -5951,7 +6167,7 @@
 
 std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
 
-class HInstanceOf : public HExpression<2> {
+class HInstanceOf FINAL : public HExpression<2> {
  public:
   HInstanceOf(HInstruction* object,
               HLoadClass* constant,
@@ -5968,7 +6184,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -6005,7 +6221,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
 };
 
-class HBoundType : public HExpression<1> {
+class HBoundType FINAL : public HExpression<1> {
  public:
   HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc),
@@ -6049,7 +6265,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBoundType);
 };
 
-class HCheckCast : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTemplateInstruction<2> {
  public:
   HCheckCast(HInstruction* object,
              HLoadClass* constant,
@@ -6064,7 +6280,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -6094,7 +6310,33 @@
   DISALLOW_COPY_AND_ASSIGN(HCheckCast);
 };
 
-class HMemoryBarrier : public HTemplateInstruction<0> {
+/**
+ * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+ * @details We define the combined barrier types that are actually required
+ * by the Java Memory Model, rather than using exactly the terminology from
+ * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
+ * primitives.  Note that the JSR-133 cookbook generally does not deal with
+ * store atomicity issues, and the recipes there are not always entirely sufficient.
+ * The current recipe is as follows:
+ * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
+ * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
+ * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
+ * -# Use StoreStore barrier after all stores but before return from any constructor whose
+ *    class has final fields.
+ * -# Use NTStoreStore to order non-temporal stores with respect to all later
+ *    store-to-memory instructions.  Only generated together with non-temporal stores.
+ */
+enum MemBarrierKind {
+  kAnyStore,
+  kLoadAny,
+  kStoreStore,
+  kAnyAny,
+  kNTStoreStore,
+  kLastBarrierKind = kNTStoreStore
+};
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
+
+class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
  public:
   explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(
@@ -6119,7 +6361,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
 };
 
-class HMonitorOperation : public HTemplateInstruction<1> {
+class HMonitorOperation FINAL : public HTemplateInstruction<1> {
  public:
   enum class OperationKind {
     kEnter,
@@ -6164,7 +6406,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMonitorOperation);
 };
 
-class HSelect : public HExpression<3> {
+class HSelect FINAL : public HExpression<3> {
  public:
   HSelect(HInstruction* condition,
           HInstruction* true_value,
@@ -6187,7 +6429,9 @@
   HInstruction* GetCondition() const { return InputAt(2); }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
 
   bool CanBeNull() const OVERRIDE {
     return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull();
@@ -6277,7 +6521,7 @@
 
 static constexpr size_t kDefaultNumberOfMoves = 4;
 
-class HParallelMove : public HTemplateInstruction<0> {
+class HParallelMove FINAL : public HTemplateInstruction<0> {
  public:
   explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc),
@@ -6344,6 +6588,9 @@
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "nodes_mips.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "nodes_x86.h"
 #endif
@@ -6563,16 +6810,6 @@
   }
 }
 
-inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
-  // For the purposes of the compiler, the dex files must actually be the same object
-  // if we want to safely treat them as the same. This is especially important for JIT
-  // as custom class loaders can open the same underlying file (or memory) multiple
-  // times and provide different class resolution but no two class loaders should ever
-  // use the same DexFile object - doing so is an unsupported hack that can lead to
-  // all sorts of weird failures.
-  return &lhs == &rhs;
-}
-
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   inline bool HInstruction::Is##type() const { return GetKind() == k##type; }  \
   inline const H##type* HInstruction::As##type() const {                       \
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
index 6a1dbb9..d9f9740e 100644
--- a/compiler/optimizing/nodes_arm.h
+++ b/compiler/optimizing/nodes_arm.h
@@ -19,13 +19,15 @@
 
 namespace art {
 
-class HArmDexCacheArraysBase : public HExpression<0> {
+class HArmDexCacheArraysBase FINAL : public HExpression<0> {
  public:
   explicit HArmDexCacheArraysBase(const DexFile& dex_file)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
         dex_file_(&dex_file),
         element_offset_(static_cast<size_t>(-1)) { }
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+
   void UpdateElementOffset(size_t element_offset) {
     // Use the lowest offset from the requested elements so that all offsets from
     // this base are non-negative because our assemblers emit negative-offset loads
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 173852a..3f88717 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -21,7 +21,7 @@
 
 namespace art {
 
-class HArm64DataProcWithShifterOp : public HExpression<2> {
+class HArm64DataProcWithShifterOp FINAL : public HExpression<2> {
  public:
   enum OpKind {
     kLSL,   // Logical shift left.
@@ -56,8 +56,8 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE {
-    HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
+  bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE {
+    const HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
     return instr_kind_ == other->instr_kind_ &&
         op_kind_ == other->op_kind_ &&
         shift_amount_ == other->shift_amount_;
@@ -94,30 +94,6 @@
 
 std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
 
-// This instruction computes an intermediate address pointing in the 'middle' of an object. The
-// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
-// never used across anything that can trigger GC.
-class HArm64IntermediateAddress : public HExpression<2> {
- public:
-  HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
-      : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
-    SetRawInputAt(0, base_address);
-    SetRawInputAt(1, offset);
-  }
-
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
-  bool IsActualObject() const OVERRIDE { return false; }
-
-  HInstruction* GetBaseAddress() const { return InputAt(0); }
-  HInstruction* GetOffset() const { return InputAt(1); }
-
-  DECLARE_INSTRUCTION(Arm64IntermediateAddress);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h
new file mode 100644
index 0000000..de77245
--- /dev/null
+++ b/compiler/optimizing/nodes_mips.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
+
+namespace art {
+
+// Compute the address of the method for MIPS Constant area support.
+class HMipsComputeBaseMethodAddress : public HExpression<0> {
+ public:
+  // Treat the value as an int32_t, but it is really a 32 bit native pointer.
+  HMipsComputeBaseMethodAddress()
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {}
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress);
+};
+
+class HMipsDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HMipsDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // We'll maximize the range of a single load instruction for dex cache array accesses
+    // by aligning offset -32768 with the offset of the first used element.
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(MipsDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index c10c718..8bd8667 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -19,7 +19,7 @@
 
 namespace art {
 
-class HMultiplyAccumulate : public HExpression<3> {
+class HMultiplyAccumulate FINAL : public HExpression<3> {
  public:
   HMultiplyAccumulate(Primitive::Type type,
                       InstructionKind op,
@@ -38,7 +38,7 @@
   static constexpr int kInputMulRightIndex = 2;
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
   }
 
@@ -53,7 +53,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
 };
 
-class HBitwiseNegatedRight : public HBinaryOperation {
+class HBitwiseNegatedRight FINAL : public HBinaryOperation {
  public:
   HBitwiseNegatedRight(Primitive::Type result_type,
                             InstructionKind op,
@@ -113,6 +113,34 @@
   DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight);
 };
 
+
+// This instruction computes an intermediate address pointing in the 'middle' of an object. The
+// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
+// never used across anything that can trigger GC.
+class HIntermediateAddress FINAL : public HExpression<2> {
+ public:
+  HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
+      : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
+    SetRawInputAt(0, base_address);
+    SetRawInputAt(1, offset);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+  bool IsActualObject() const OVERRIDE { return false; }
+
+  HInstruction* GetBaseAddress() const { return InputAt(0); }
+  HInstruction* GetOffset() const { return InputAt(1); }
+
+  DECLARE_INSTRUCTION(IntermediateAddress);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
+};
+
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index 0b3a84d..fa47976 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -20,12 +20,14 @@
 namespace art {
 
 // Compute the address of the method for X86 Constant area support.
-class HX86ComputeBaseMethodAddress : public HExpression<0> {
+class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> {
  public:
   // Treat the value as an int32_t, but it is really a 32 bit native pointer.
   HX86ComputeBaseMethodAddress()
       : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {}
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+
   DECLARE_INSTRUCTION(X86ComputeBaseMethodAddress);
 
  private:
@@ -33,7 +35,7 @@
 };
 
 // Load a constant value from the constant table.
-class HX86LoadFromConstantTable : public HExpression<2> {
+class HX86LoadFromConstantTable FINAL : public HExpression<2> {
  public:
   HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base,
                             HConstant* constant)
@@ -57,7 +59,7 @@
 };
 
 // Version of HNeg with access to the constant table for FP types.
-class HX86FPNeg : public HExpression<2> {
+class HX86FPNeg FINAL : public HExpression<2> {
  public:
   HX86FPNeg(Primitive::Type result_type,
             HInstruction* input,
@@ -76,7 +78,7 @@
 };
 
 // X86 version of HPackedSwitch that holds a pointer to the base method address.
-class HX86PackedSwitch : public HTemplateInstruction<2> {
+class HX86PackedSwitch FINAL : public HTemplateInstruction<2> {
  public:
   HX86PackedSwitch(int32_t start_value,
                    int32_t num_entries,
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 2f59d4c..0819fb0 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -37,7 +37,10 @@
 
   virtual ~HOptimization() {}
 
-  // Return the name of the pass.
+  // Return the name of the pass. Pass names for a single HOptimization should be of form
+  // <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix.
+  // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce',
+  // 'instruction_simplifier$before_codegen'.
   const char* GetPassName() const { return pass_name_; }
 
   // Perform the analysis itself.
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 400686d..8c0231e 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -32,7 +32,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class OptimizingCFITest : public CFITest {
  public:
@@ -157,13 +157,26 @@
     TestImpl(isa, #isa, expected_asm, expected_cfi);          \
   }
 
+#ifdef ART_ENABLE_CODEGEN_arm
 TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
 TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
 TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
 TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
 TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
 TEST_ISA(kMips64)
+#endif
 
+#ifdef ART_ENABLE_CODEGEN_arm
 TEST_F(OptimizingCFITest, kThumb2Adjust) {
   std::vector<uint8_t> expected_asm(
       expected_asm_kThumb2_adjust,
@@ -184,7 +197,9 @@
   Finish();
   Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi);
 }
+#endif
 
+#ifdef ART_ENABLE_CODEGEN_mips
 TEST_F(OptimizingCFITest, kMipsAdjust) {
   // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
   static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -212,7 +227,9 @@
   Finish();
   Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
 }
+#endif
 
+#ifdef ART_ENABLE_CODEGEN_mips64
 TEST_F(OptimizingCFITest, kMips64Adjust) {
   // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
   static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -240,7 +257,8 @@
   Finish();
   Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
 }
+#endif
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index fc66823..05eb063 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -32,21 +32,21 @@
 // 0x00000012: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
-    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9,
-    0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9,
+    0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44,
+    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44,
     0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
-    0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+    0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: str x0, [sp, #-64]!
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: stp x20, x21, [sp, #40]
+// 0x00000004: str x20, [sp, #40]
 // 0x00000008: .cfi_offset: r20 at cfa-24
-// 0x00000008: .cfi_offset: r21 at cfa-16
-// 0x00000008: str lr, [sp, #56]
+// 0x00000008: stp x21, lr, [sp, #48]
+// 0x0000000c: .cfi_offset: r21 at cfa-16
 // 0x0000000c: .cfi_offset: r30 at cfa-8
 // 0x0000000c: stp d8, d9, [sp, #24]
 // 0x00000010: .cfi_offset_extended: r72 at cfa-40
@@ -55,10 +55,10 @@
 // 0x00000010: ldp d8, d9, [sp, #24]
 // 0x00000014: .cfi_restore_extended: r72
 // 0x00000014: .cfi_restore_extended: r73
-// 0x00000014: ldp x20, x21, [sp, #40]
+// 0x00000014: ldr x20, [sp, #40]
 // 0x00000018: .cfi_restore: r20
-// 0x00000018: .cfi_restore: r21
-// 0x00000018: ldr lr, [sp, #56]
+// 0x00000018: ldp x21, lr, [sp, #48]
+// 0x0000001c: .cfi_restore: r21
 // 0x0000001c: .cfi_restore: r30
 // 0x0000001c: add sp, sp, #0x40 (64)
 // 0x00000020: .cfi_def_cfa_offset: 0
@@ -140,53 +140,43 @@
 // 0x0000002d: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips[] = {
-    0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF,
-    0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7,
-    0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27,
-    0x00, 0x00, 0xA4, 0xAF, 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7,
-    0x04, 0x00, 0xB5, 0xC7, 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7,
-    0x10, 0x00, 0xB0, 0x8F, 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F,
-    0x1C, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
+    0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
+    0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F,
+    0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7,
+    0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
 };
 static constexpr uint8_t expected_cfi_kMips[] = {
-    0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
-    0x54, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1,
-    0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+    0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48,
+    0x0B, 0x0E, 0x40,
 };
-// 0x00000000: addiu r29, r29, -28
-// 0x00000004: .cfi_def_cfa_offset: 28
-// 0x00000004: sw r31, +24(r29)
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r17, +20(r29)
+// 0x00000008: sw r17, +56(r29)
 // 0x0000000c: .cfi_offset: r17 at cfa-8
-// 0x0000000c: sw r16, +16(r29)
+// 0x0000000c: sw r16, +52(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-12
-// 0x00000010: swc1 f22, +8(r29)
-// 0x00000014: swc1 f23, +12(r29)
-// 0x00000018: swc1 f20, +0(r29)
-// 0x0000001c: swc1 f21, +4(r29)
-// 0x00000020: addiu r29, r29, -36
-// 0x00000024: .cfi_def_cfa_offset: 64
-// 0x00000024: sw r4, +0(r29)
-// 0x00000028: .cfi_remember_state
-// 0x00000028: addiu r29, r29, 36
-// 0x0000002c: .cfi_def_cfa_offset: 28
-// 0x0000002c: lwc1 f20, +0(r29)
-// 0x00000030: lwc1 f21, +4(r29)
-// 0x00000034: lwc1 f22, +8(r29)
-// 0x00000038: lwc1 f23, +12(r29)
-// 0x0000003c: lw r16, +16(r29)
-// 0x00000040: .cfi_restore: r16
-// 0x00000040: lw r17, +20(r29)
-// 0x00000044: .cfi_restore: r17
-// 0x00000044: lw r31, +24(r29)
-// 0x00000048: .cfi_restore: r31
-// 0x00000048: addiu r29, r29, 28
-// 0x0000004c: .cfi_def_cfa_offset: 0
-// 0x0000004c: jr r31
-// 0x00000050: nop
-// 0x00000054: .cfi_restore_state
-// 0x00000054: .cfi_def_cfa_offset: 64
+// 0x00000010: sdc1 f22, +40(r29)
+// 0x00000014: sdc1 f20, +32(r29)
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: lw r31, +60(r29)
+// 0x00000020: .cfi_restore: r31
+// 0x00000020: lw r17, +56(r29)
+// 0x00000024: .cfi_restore: r17
+// 0x00000024: lw r16, +52(r29)
+// 0x00000028: .cfi_restore: r16
+// 0x00000028: ldc1 f22, +40(r29)
+// 0x0000002c: ldc1 f20, +32(r29)
+// 0x00000030: addiu r29, r29, 64
+// 0x00000034: .cfi_def_cfa_offset: 0
+// 0x00000034: jr r31
+// 0x00000038: nop
+// 0x0000003c: .cfi_restore_state
+// 0x0000003c: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64[] = {
     0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
@@ -349,75 +339,65 @@
 // 0x00000098: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
-    0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF,
-    0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7,
-    0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27,
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
+    0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
     0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
     0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
     0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
     0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
 };
 static constexpr uint8_t expected_asm_kMips_adjust_tail[] = {
-    0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7, 0x04, 0x00, 0xB5, 0xC7,
-    0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7, 0x10, 0x00, 0xB0, 0x8F,
-    0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F, 0x1C, 0x00, 0xBD, 0x27,
+    0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F,
+    0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x40, 0x00, 0xBD, 0x27,
     0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
 };
 static constexpr uint8_t expected_cfi_kMips_adjust[] = {
-    0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
-    0x54, 0x0E, 0x40, 0x4C, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00,
-    0x02, 0x00, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
-    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+    0x54, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
+    0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
+    0x40,
 };
-// 0x00000000: addiu r29, r29, -28
-// 0x00000004: .cfi_def_cfa_offset: 28
-// 0x00000004: sw r31, +24(r29)
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r17, +20(r29)
+// 0x00000008: sw r17, +56(r29)
 // 0x0000000c: .cfi_offset: r17 at cfa-8
-// 0x0000000c: sw r16, +16(r29)
+// 0x0000000c: sw r16, +52(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-12
-// 0x00000010: swc1 f22, +8(r29)
-// 0x00000014: swc1 f23, +12(r29)
-// 0x00000018: swc1 f20, +0(r29)
-// 0x0000001c: swc1 f21, +4(r29)
-// 0x00000020: addiu r29, r29, -36
-// 0x00000024: .cfi_def_cfa_offset: 64
-// 0x00000024: sw r4, +0(r29)
-// 0x00000028: bne r0, r4, 0x0000004c ; +36
-// 0x0000002c: addiu r29, r29, -4
-// 0x00000030: .cfi_def_cfa_offset: 68
-// 0x00000030: sw r31, +0(r29)
-// 0x00000034: bltzal r0, 0x00000038 ; +4
-// 0x00000038: lui r1, 0x20000
-// 0x0000003c: ori r1, r1, 24
-// 0x00000040: addu r1, r1, r31
-// 0x00000044: lw r31, +0(r29)
-// 0x00000048: jr r1
-// 0x0000004c: addiu r29, r29, 4
-// 0x00000050: .cfi_def_cfa_offset: 64
-// 0x00000050: nop
+// 0x00000010: sdc1 f22, +40(r29)
+// 0x00000014: sdc1 f20, +32(r29)
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: bne r0, r4, 0x00000040 ; +36
+// 0x00000020: addiu r29, r29, -4
+// 0x00000024: .cfi_def_cfa_offset: 68
+// 0x00000024: sw r31, +0(r29)
+// 0x00000028: bltzal r0, 0x0000002c ; +4
+// 0x0000002c: lui r1, 0x20000
+// 0x00000030: ori r1, r1, 24
+// 0x00000034: addu r1, r1, r31
+// 0x00000038: lw r31, +0(r29)
+// 0x0000003c: jr r1
+// 0x00000040: addiu r29, r29, 4
+// 0x00000044: .cfi_def_cfa_offset: 64
+// 0x00000044: nop
 //             ...
-// 0x00020050: nop
-// 0x00020054: .cfi_remember_state
-// 0x00020054: addiu r29, r29, 36
-// 0x00020058: .cfi_def_cfa_offset: 28
-// 0x00020058: lwc1 f20, +0(r29)
-// 0x0002005c: lwc1 f21, +4(r29)
-// 0x00020060: lwc1 f22, +8(r29)
-// 0x00020064: lwc1 f23, +12(r29)
-// 0x00020068: lw r16, +16(r29)
-// 0x0002006c: .cfi_restore: r16
-// 0x0002006c: lw r17, +20(r29)
-// 0x00020070: .cfi_restore: r17
-// 0x00020070: lw r31, +24(r29)
-// 0x00020074: .cfi_restore: r31
-// 0x00020074: addiu r29, r29, 28
-// 0x00020078: .cfi_def_cfa_offset: 0
-// 0x00020078: jr r31
-// 0x0002007c: nop
-// 0x00020080: .cfi_restore_state
-// 0x00020080: .cfi_def_cfa_offset: 64
+// 0x00020044: nop
+// 0x00020048: .cfi_remember_state
+// 0x00020048: lw r31, +60(r29)
+// 0x0002004c: .cfi_restore: r31
+// 0x0002004c: lw r17, +56(r29)
+// 0x00020050: .cfi_restore: r17
+// 0x00020050: lw r16, +52(r29)
+// 0x00020054: .cfi_restore: r16
+// 0x00020054: ldc1 f22, +40(r29)
+// 0x00020058: ldc1 f20, +32(r29)
+// 0x0002005c: addiu r29, r29, 64
+// 0x00020060: .cfi_def_cfa_offset: 0
+// 0x00020060: jr r31
+// 0x00020064: nop
+// 0x00020068: .cfi_restore_state
+// 0x00020068: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
     0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c9a4bfe..cc9cbda 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -28,10 +28,19 @@
 #include "instruction_simplifier_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "dex_cache_array_fixups_mips.h"
+#include "pc_relative_fixups_mips.h"
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "pc_relative_fixups_x86.h"
 #endif
 
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
+#include "x86_memory_gen.h"
+#endif
+
 #include "art_method-inl.h"
 #include "base/arena_allocator.h"
 #include "base/arena_containers.h"
@@ -72,7 +81,7 @@
 #include "oat_quick_method_header.h"
 #include "prepare_for_register_allocation.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "select_generator.h"
 #include "sharpening.h"
 #include "side_effects_analysis.h"
@@ -86,6 +95,8 @@
 
 static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
 
+static constexpr const char* kPassNameSeparator = "$";
+
 /**
  * Used by the code generator, to allocate the code in a vector.
  */
@@ -169,6 +180,7 @@
 
  private:
   void StartPass(const char* pass_name) {
+    VLOG(compiler) << "Starting pass: " << pass_name;
     // Dump graph first, then start timer.
     if (visualizer_enabled_) {
       visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
@@ -257,7 +269,7 @@
 class OptimizingCompiler FINAL : public Compiler {
  public:
   explicit OptimizingCompiler(CompilerDriver* driver);
-  ~OptimizingCompiler();
+  ~OptimizingCompiler() OVERRIDE;
 
   bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE;
 
@@ -272,8 +284,13 @@
 
   CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
-                             const DexFile& dex_file) const OVERRIDE {
-    return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
+                             const DexFile& dex_file,
+                             JniOptimizationFlags optimization_flags) const OVERRIDE {
+    return ArtQuickJniCompileMethod(GetCompilerDriver(),
+                                    access_flags,
+                                    method_idx,
+                                    dex_file,
+                                    optimization_flags);
   }
 
   uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE
@@ -297,6 +314,18 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
+  void RunOptimizations(HGraph* graph,
+                        CodeGenerator* codegen,
+                        CompilerDriver* driver,
+                        const DexCompilationUnit& dex_compilation_unit,
+                        PassObserver* pass_observer,
+                        StackHandleScopeCollection* handles) const;
+
+  void RunOptimizations(HOptimization* optimizations[],
+                        size_t length,
+                        PassObserver* pass_observer) const;
+
+ private:
   // Create a 'CompiledMethod' for an optimized graph.
   CompiledMethod* Emit(ArenaAllocator* arena,
                        CodeVectorAllocator* code_allocator,
@@ -324,6 +353,18 @@
                             ArtMethod* method,
                             bool osr) const;
 
+  void MaybeRunInliner(HGraph* graph,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver,
+                       const DexCompilationUnit& dex_compilation_unit,
+                       PassObserver* pass_observer,
+                       StackHandleScopeCollection* handles) const;
+
+  void RunArchOptimizations(InstructionSet instruction_set,
+                            HGraph* graph,
+                            CodeGenerator* codegen,
+                            PassObserver* pass_observer) const;
+
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
   std::unique_ptr<std::ostream> visualizer_output_;
@@ -387,22 +428,148 @@
       || instruction_set == kX86_64;
 }
 
-static void RunOptimizations(HOptimization* optimizations[],
-                             size_t length,
-                             PassObserver* pass_observer) {
+// Strip pass name suffix to get optimization name.
+static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) {
+  size_t pos = pass_name.find(kPassNameSeparator);
+  return pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
+}
+
+static HOptimization* BuildOptimization(
+    const std::string& pass_name,
+    ArenaAllocator* arena,
+    HGraph* graph,
+    OptimizingCompilerStats* stats,
+    CodeGenerator* codegen,
+    CompilerDriver* driver,
+    const DexCompilationUnit& dex_compilation_unit,
+    StackHandleScopeCollection* handles,
+    SideEffectsAnalysis* most_recent_side_effects,
+    HInductionVarAnalysis* most_recent_induction) {
+  std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
+  if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
+    CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
+    return new (arena) BoundsCheckElimination(graph,
+                                              *most_recent_side_effects,
+                                              most_recent_induction);
+  } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str());
+  } else if (opt_name == HConstantFolding::kConstantFoldingPassName) {
+    return new (arena) HConstantFolding(graph, pass_name.c_str());
+  } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) {
+    return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str());
+  } else if (opt_name == HInliner::kInlinerPassName) {
+    size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
+    return new (arena) HInliner(graph,                   // outer_graph
+                                graph,                   // outermost_graph
+                                codegen,
+                                dex_compilation_unit,    // outer_compilation_unit
+                                dex_compilation_unit,    // outermost_compilation_unit
+                                driver,
+                                handles,
+                                stats,
+                                number_of_dex_registers,
+                                /* depth */ 0);
+  } else if (opt_name == HSharpening::kSharpeningPassName) {
+    return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
+  } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
+    return new (arena) HSelectGenerator(graph, stats);
+  } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+    return new (arena) HInductionVarAnalysis(graph);
+  } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
+    return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
+  } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
+    return new (arena) IntrinsicsRecognizer(graph, driver, stats);
+  } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) LICM(graph, *most_recent_side_effects, stats);
+  } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
+    CHECK(most_recent_side_effects != nullptr);
+    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+  } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+    return new (arena) SideEffectsAnalysis(graph);
+#ifdef ART_ENABLE_CODEGEN_arm
+  } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
+    return new (arena) arm::DexCacheArrayFixups(graph, stats);
+  } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
+    return new (arena) arm::InstructionSimplifierArm(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+  } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) {
+    return new (arena) arm64::InstructionSimplifierArm64(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+  } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) {
+    return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
+  } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) {
+    return new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+  } else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) {
+    return new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+  } else if (opt_name == x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName) {
+    return new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+#endif
+  }
+  return nullptr;
+}
+
+static ArenaVector<HOptimization*> BuildOptimizations(
+    const std::vector<std::string>& pass_names,
+    ArenaAllocator* arena,
+    HGraph* graph,
+    OptimizingCompilerStats* stats,
+    CodeGenerator* codegen,
+    CompilerDriver* driver,
+    const DexCompilationUnit& dex_compilation_unit,
+    StackHandleScopeCollection* handles) {
+  // Few HOptimizations constructors require SideEffectsAnalysis or HInductionVarAnalysis
+  // instances. This method assumes that each of them expects the nearest instance preceeding it
+  // in the pass name list.
+  SideEffectsAnalysis* most_recent_side_effects = nullptr;
+  HInductionVarAnalysis* most_recent_induction = nullptr;
+  ArenaVector<HOptimization*> ret(arena->Adapter());
+  for (const std::string& pass_name : pass_names) {
+    HOptimization* opt = BuildOptimization(
+        pass_name,
+        arena,
+        graph,
+        stats,
+        codegen,
+        driver,
+        dex_compilation_unit,
+        handles,
+        most_recent_side_effects,
+        most_recent_induction);
+    CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
+    ret.push_back(opt);
+
+    std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
+    if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+      most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
+    } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+      most_recent_induction = down_cast<HInductionVarAnalysis*>(opt);
+    }
+  }
+  return ret;
+}
+
+void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[],
+                                          size_t length,
+                                          PassObserver* pass_observer) const {
   for (size_t i = 0; i < length; ++i) {
     PassScope scope(optimizations[i]->GetPassName(), pass_observer);
     optimizations[i]->Run();
   }
 }
 
-static void MaybeRunInliner(HGraph* graph,
-                            CodeGenerator* codegen,
-                            CompilerDriver* driver,
-                            OptimizingCompilerStats* stats,
-                            const DexCompilationUnit& dex_compilation_unit,
-                            PassObserver* pass_observer,
-                            StackHandleScopeCollection* handles) {
+void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* driver,
+                                         const DexCompilationUnit& dex_compilation_unit,
+                                         PassObserver* pass_observer,
+                                         StackHandleScopeCollection* handles) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   const CompilerOptions& compiler_options = driver->GetCompilerOptions();
   bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
       && (compiler_options.GetInlineMaxCodeUnits() > 0);
@@ -411,11 +578,11 @@
   }
   size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
   HInliner* inliner = new (graph->GetArena()) HInliner(
-      graph,
-      graph,
+      graph,                   // outer_graph
+      graph,                   // outermost_graph
       codegen,
-      dex_compilation_unit,
-      dex_compilation_unit,
+      dex_compilation_unit,    // outer_compilation_unit
+      dex_compilation_unit,    // outermost_compilation_unit
       driver,
       handles,
       stats,
@@ -426,11 +593,12 @@
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
 }
 
-static void RunArchOptimizations(InstructionSet instruction_set,
-                                 HGraph* graph,
-                                 CodeGenerator* codegen,
-                                 OptimizingCompilerStats* stats,
-                                 PassObserver* pass_observer) {
+void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
+                                              HGraph* graph,
+                                              CodeGenerator* codegen,
+                                              PassObserver* pass_observer) const {
+  UNUSED(codegen);  // To avoid compilation error when compiling for svelte
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
@@ -439,8 +607,12 @@
       arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
       arm::InstructionSimplifierArm* simplifier =
           new (arena) arm::InstructionSimplifierArm(graph, stats);
+      SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
+      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
       HOptimization* arm_optimizations[] = {
         simplifier,
+        side_effects,
+        gvn,
         fixups
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
@@ -452,7 +624,7 @@
       arm64::InstructionSimplifierArm64* simplifier =
           new (arena) arm64::InstructionSimplifierArm64(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
-      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
+      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
       HOptimization* arm64_optimizations[] = {
         simplifier,
         side_effects,
@@ -462,17 +634,45 @@
       break;
     }
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips: {
+      mips::PcRelativeFixups* pc_relative_fixups =
+          new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+      mips::DexCacheArrayFixups* dex_cache_array_fixups =
+          new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
+      HOptimization* mips_optimizations[] = {
+          pc_relative_fixups,
+          dex_cache_array_fixups
+      };
+      RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86: {
       x86::PcRelativeFixups* pc_relative_fixups =
           new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+      x86::X86MemoryOperandGeneration* memory_gen =
+          new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
       HOptimization* x86_optimizations[] = {
-          pc_relative_fixups
+          pc_relative_fixups,
+          memory_gen
       };
       RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer);
       break;
     }
 #endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case kX86_64: {
+      x86::X86MemoryOperandGeneration* memory_gen =
+          new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+      HOptimization* x86_64_optimizations[] = {
+          memory_gen
+      };
+      RunOptimizations(x86_64_optimizations, arraysize(x86_64_optimizations), pass_observer);
+      break;
+    }
+#endif
     default:
       break;
   }
@@ -481,7 +681,8 @@
 NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
-                              PassObserver* pass_observer) {
+                              PassObserver* pass_observer,
+                              RegisterAllocator::Strategy strategy) {
   {
     PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
                     pass_observer);
@@ -494,27 +695,42 @@
   }
   {
     PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
-    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+    RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters();
   }
 }
 
-static void RunOptimizations(HGraph* graph,
-                             CodeGenerator* codegen,
-                             CompilerDriver* driver,
-                             OptimizingCompilerStats* stats,
-                             const DexCompilationUnit& dex_compilation_unit,
-                             PassObserver* pass_observer,
-                             StackHandleScopeCollection* handles) {
+void OptimizingCompiler::RunOptimizations(HGraph* graph,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* driver,
+                                          const DexCompilationUnit& dex_compilation_unit,
+                                          PassObserver* pass_observer,
+                                          StackHandleScopeCollection* handles) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
+  if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) {
+    ArenaVector<HOptimization*> optimizations = BuildOptimizations(
+        *driver->GetCompilerOptions().GetPassesToRun(),
+        arena,
+        graph,
+        stats,
+        codegen,
+        driver,
+        dex_compilation_unit,
+        handles);
+    RunOptimizations(&optimizations[0], optimizations.size(), pass_observer);
+    return;
+  }
+
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
-      graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+      graph, stats, "dead_code_elimination$initial");
   HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
-      graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
+      graph, stats, "dead_code_elimination$final");
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
-  HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
-  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
+  HConstantFolding* fold2 = new (arena) HConstantFolding(
+      graph, "constant_folding$after_inlining");
+  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects, stats);
@@ -523,9 +739,9 @@
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_after_bce");
+      graph, stats, "instruction_simplifier$after_bce");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_before_codegen");
+      graph, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
 
   HOptimization* optimizations1[] = {
@@ -537,7 +753,7 @@
   };
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
-  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
+  MaybeRunInliner(graph, codegen, driver, dex_compilation_unit, pass_observer, handles);
 
   HOptimization* optimizations2[] = {
     // SelectGenerator depends on the InstructionSimplifier removing
@@ -560,8 +776,7 @@
   };
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
-  RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, stats, pass_observer);
-  AllocateRegisters(graph, codegen, pass_observer);
+  RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
 }
 
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
@@ -772,11 +987,14 @@
     RunOptimizations(graph,
                      codegen.get(),
                      compiler_driver,
-                     compilation_stats_.get(),
                      dex_compilation_unit,
                      &pass_observer,
                      &handles);
 
+    RegisterAllocator::Strategy regalloc_strategy =
+      compiler_options.GetRegisterAllocationStrategy();
+    AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
+
     codegen->Compile(code_allocator);
     pass_observer.DumpDisassembly();
   }
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 9cc6ea4..c8d1ce0 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -65,6 +65,7 @@
   kInlinedInvokeVirtualOrInterface,
   kImplicitNullCheckGenerated,
   kExplicitNullCheckGenerated,
+  kSimplifyIf,
   kLastStat
 };
 
@@ -143,6 +144,7 @@
       case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break;
       case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
       case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
+      case kSimplifyIf: name = "SimplifyIf"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
new file mode 100644
index 0000000..c6acc45
--- /dev/null
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pc_relative_fixups_mips.h"
+#include "code_generator_mips.h"
+#include "intrinsics_mips.h"
+
+namespace art {
+namespace mips {
+
+/**
+ * Finds instructions that need the constant area base as an input.
+ */
+class PCRelativeHandlerVisitor : public HGraphVisitor {
+ public:
+  PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
+        base_(nullptr) {}
+
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+      // Computing the base for PC-relative literals will clobber RA with
+      // the NAL instruction on R2. Take a note of this before generating
+      // the method entry.
+      codegen_->ClobberRA();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void InitializePCRelativeBasePointer() {
+    // Ensure we only initialize the pointer once.
+    if (base_ != nullptr) {
+      return;
+    }
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HMipsComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
+    DCHECK(base_ != nullptr);
+  }
+
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+    switch (load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+        // Add a base register for PC-relative literals on R2.
+        InitializePCRelativeBasePointer();
+        load_class->AddSpecialInput(base_);
+        break;
+      default:
+        break;
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    HLoadString::LoadKind load_kind = load_string->GetLoadKind();
+    switch (load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+        // Add a base register for PC-relative literals on R2.
+        InitializePCRelativeBasePointer();
+        load_string->AddSpecialInput(base_);
+        break;
+      default:
+        break;
+    }
+  }
+
+  void HandleInvoke(HInvoke* invoke) {
+    // If this is an invoke-static/-direct with PC-relative dex cache array
+    // addressing, we need the PC-relative address base.
+    HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+    if (invoke_static_or_direct != nullptr) {
+      HInvokeStaticOrDirect::MethodLoadKind method_load_kind =
+          invoke_static_or_direct->GetMethodLoadKind();
+      HInvokeStaticOrDirect::CodePtrLocation code_ptr_location =
+          invoke_static_or_direct->GetCodePtrLocation();
+
+      bool has_extra_input =
+          (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+          (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup);
+
+      // We can't add a pointer to the constant area if we already have a current
+      // method pointer. This may arise when sharpening doesn't remove the current
+      // method pointer from the invoke.
+      if (invoke_static_or_direct->HasCurrentMethodInput()) {
+        DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+        CHECK(!has_extra_input);
+        return;
+      }
+
+      if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) {
+        InitializePCRelativeBasePointer();
+        // Add the extra parameter base_.
+        invoke_static_or_direct->AddSpecialInput(base_);
+      }
+    }
+  }
+
+  bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) {
+    if (invoke->GetIntrinsic() != Intrinsics::kNone) {
+      // This invoke may have intrinsic code generation defined. However, we must
+      // now also determine if this code generation is truly there and call-free
+      // (not unimplemented, no bail on instruction features, or call on slow path).
+      // This is done by actually calling the locations builder on the instruction
+      // and clearing out the locations once result is known. We assume this
+      // call only has creating locations as side effects!
+      IntrinsicLocationsBuilderMIPS builder(codegen_);
+      bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
+      invoke->SetLocations(nullptr);
+      return success;
+    }
+    return false;
+  }
+
+  CodeGeneratorMIPS* codegen_;
+
+  // The generated HMipsComputeBaseMethodAddress in the entry block needed as an
+  // input to the HMipsLoadFromConstantTable instructions.
+  HMipsComputeBaseMethodAddress* base_;
+};
+
+void PcRelativeFixups::Run() {
+  CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
+  if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
+    // Do nothing for R6 because it has PC-relative addressing.
+    return;
+  }
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
+  PCRelativeHandlerVisitor visitor(graph_, codegen_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
+}
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h
new file mode 100644
index 0000000..5a7397b
--- /dev/null
+++ b/compiler/optimizing/pc_relative_fixups_mips.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace mips {
+
+class PcRelativeFixups : public HOptimization {
+ public:
+  PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "pc_relative_fixups_mips", stats),
+        codegen_(codegen) {}
+
+  static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips";
+
+  void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index dafbd3d..ad0921d 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -80,6 +80,15 @@
     HandleInvoke(invoke);
   }
 
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+    if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+      InitializePCRelativeBasePointer();
+      load_class->AddSpecialInput(base_);
+    }
+  }
+
   void VisitLoadString(HLoadString* load_string) OVERRIDE {
     HLoadString::LoadKind load_kind = load_string->GetLoadKind();
     if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
@@ -202,8 +211,9 @@
     }
 
     // Ensure that we can load FP arguments from the constant area.
-    for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
-      HConstant* input = invoke->InputAt(i)->AsConstant();
+    HInputsRef inputs = invoke->GetInputs();
+    for (size_t i = 0; i < inputs.size(); i++) {
+      HConstant* input = inputs[i]->AsConstant();
       if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
         ReplaceInput(invoke, input, i, true);
       }
@@ -217,6 +227,7 @@
       case Intrinsics::kMathMaxFloatFloat:
       case Intrinsics::kMathMinDoubleDouble:
       case Intrinsics::kMathMinFloatFloat:
+      case Intrinsics::kMathRoundFloat:
         if (!base_added) {
           DCHECK(invoke_static_or_direct != nullptr);
           DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index 03de2fc..72fa71e 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -29,9 +29,11 @@
 class PcRelativeFixups : public HOptimization {
  public:
   PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "pc_relative_fixups_x86", stats),
+      : HOptimization(graph, kPcRelativeFixupsX86PassName, stats),
         codegen_(codegen) {}
 
+  static constexpr const char* kPcRelativeFixupsX86PassName  = "pc_relative_fixups_x86";
+
   void Run() OVERRIDE;
 
  private:
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 38e42f7..8fb5396 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,22 @@
 
 void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
   check->ReplaceWith(check->InputAt(0));
+  if (check->IsStringCharAt()) {
+    // Add a fake environment for String.charAt() inline info as we want
+    // the exception to appear as being thrown from there.
+    const DexFile& dex_file = check->GetEnvironment()->GetDexFile();
+    DCHECK_STREQ(PrettyMethod(check->GetStringCharAtMethodIndex(), dex_file).c_str(),
+                 "char java.lang.String.charAt(int)");
+    ArenaAllocator* arena = GetGraph()->GetArena();
+    HEnvironment* environment = new (arena) HEnvironment(arena,
+                                                         /* number_of_vregs */ 0u,
+                                                         dex_file,
+                                                         check->GetStringCharAtMethodIndex(),
+                                                         /* dex_pc */ DexFile::kDexNoIndex,
+                                                         kVirtual,
+                                                         check);
+    check->InsertRawEnvironment(environment);
+  }
 }
 
 void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) {
@@ -173,8 +189,7 @@
 
 void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   if (invoke->IsStaticWithExplicitClinitCheck()) {
-    size_t last_input_index = invoke->InputCount() - 1;
-    HLoadClass* last_input = invoke->InputAt(last_input_index)->AsLoadClass();
+    HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
     DCHECK(last_input != nullptr)
         << "Last input is not HLoadClass. It is " << last_input->DebugName();
 
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index ee32518..5891350 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -39,16 +39,17 @@
   }
 
   void PrintPostInstruction(HInstruction* instruction) {
-    if (instruction->InputCount() != 0) {
+    HConstInputsRef inputs = instruction->GetInputs();
+    if (!inputs.empty()) {
       PrintString("(");
       bool first = true;
-      for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
+      for (const HInstruction* input : inputs) {
         if (first) {
           first = false;
         } else {
           PrintString(", ");
         }
-        PrintInt(it.Current()->GetId());
+        PrintInt(input->GetId());
       }
       PrintString(")");
     }
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 479ffc2..e96ab19 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -16,6 +16,7 @@
 
 #include "reference_type_propagation.h"
 
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
@@ -46,13 +47,6 @@
   return *cache;
 }
 
-// Returns true if klass is admissible to the propagation: non-null and resolved.
-// For an array type, we also check if the component type is admissible.
-static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
-  return klass != nullptr && klass->IsResolved() &&
-      (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
-}
-
 ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() {
   return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_);
 }
@@ -782,7 +776,7 @@
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
   mirror::DexCache* dex_cache =
       FindDexCacheWithHint(soa.Self(), instr->GetDexFile(), hint_dex_cache_);
-  size_t pointer_size = cl->GetImagePointerSize();
+  PointerSize pointer_size = cl->GetImagePointerSize();
   ArtMethod* method = dex_cache->GetResolvedMethod(instr->GetDexMethodIndex(), pointer_size);
   mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(false, pointer_size);
   SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
@@ -823,13 +817,13 @@
 void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
   DCHECK(instr->IsLive());
 
-  size_t input_count = instr->InputCount();
+  HInputsRef inputs = instr->GetInputs();
   size_t first_input_index_not_null = 0;
-  while (first_input_index_not_null < input_count &&
-      instr->InputAt(first_input_index_not_null)->IsNullConstant()) {
+  while (first_input_index_not_null < inputs.size() &&
+         inputs[first_input_index_not_null]->IsNullConstant()) {
     first_input_index_not_null++;
   }
-  if (first_input_index_not_null == input_count) {
+  if (first_input_index_not_null == inputs.size()) {
     // All inputs are NullConstants, set the type to object.
     // This may happen in the presence of inlining.
     instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
@@ -844,11 +838,11 @@
     return;
   }
 
-  for (size_t i = first_input_index_not_null + 1; i < input_count; i++) {
-    if (instr->InputAt(i)->IsNullConstant()) {
+  for (size_t i = first_input_index_not_null + 1; i < inputs.size(); i++) {
+    if (inputs[i]->IsNullConstant()) {
       continue;
     }
-    new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
+    new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo());
     if (new_rti.IsValid() && new_rti.IsObjectClass()) {
       if (!new_rti.IsExact()) {
         break;
@@ -879,8 +873,8 @@
   if (instr->IsPhi()) {
     HPhi* phi = instr->AsPhi();
     bool new_can_be_null = false;
-    for (size_t i = 0; i < phi->InputCount(); i++) {
-      if (phi->InputAt(i)->CanBeNull()) {
+    for (HInstruction* input : phi->GetInputs()) {
+      if (input->CanBeNull()) {
         new_can_be_null = true;
         break;
       }
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 2106be6..edd83bf 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -42,6 +42,14 @@
 
   void Run() OVERRIDE;
 
+  // Returns true if klass is admissible to the propagation: non-null and resolved.
+  // For an array type, we also check if the component type is admissible.
+  static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return klass != nullptr &&
+           klass->IsResolved() &&
+           (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
+  }
+
   static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation";
 
  private:
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
new file mode 100644
index 0000000..3450286
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -0,0 +1,653 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocation_resolver.h"
+
+#include "code_generator.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator,
+                                                       CodeGenerator* codegen,
+                                                       const SsaLivenessAnalysis& liveness)
+      : allocator_(allocator),
+        codegen_(codegen),
+        liveness_(liveness) {}
+
+void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
+                                         size_t max_safepoint_live_fp_regs,
+                                         size_t reserved_out_slots,
+                                         size_t int_spill_slots,
+                                         size_t long_spill_slots,
+                                         size_t float_spill_slots,
+                                         size_t double_spill_slots,
+                                         size_t catch_phi_spill_slots,
+                                         const ArenaVector<LiveInterval*>& temp_intervals) {
+  size_t spill_slots = int_spill_slots
+                     + long_spill_slots
+                     + float_spill_slots
+                     + double_spill_slots
+                     + catch_phi_spill_slots;
+
+  // Computes frame size and spill mask.
+  codegen_->InitializeCodeGeneration(spill_slots,
+                                     max_safepoint_live_core_regs,
+                                     max_safepoint_live_fp_regs,
+                                     reserved_out_slots,  // Includes slot(s) for the art method.
+                                     codegen_->GetGraph()->GetLinearOrder());
+
+  // Resolve outputs, including stack locations.
+  // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
+  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    LiveInterval* current = instruction->GetLiveInterval();
+    LocationSummary* locations = instruction->GetLocations();
+    Location location = locations->Out();
+    if (instruction->IsParameterValue()) {
+      // Now that we know the frame size, adjust the parameter's location.
+      if (location.IsStackSlot()) {
+        location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+        current->SetSpillSlot(location.GetStackIndex());
+        locations->UpdateOut(location);
+      } else if (location.IsDoubleStackSlot()) {
+        location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+        current->SetSpillSlot(location.GetStackIndex());
+        locations->UpdateOut(location);
+      } else if (current->HasSpillSlot()) {
+        current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
+      }
+    } else if (instruction->IsCurrentMethod()) {
+      // The current method is always at offset 0.
+      DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
+    } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+      DCHECK(current->HasSpillSlot());
+      size_t slot = current->GetSpillSlot()
+                    + spill_slots
+                    + reserved_out_slots
+                    - catch_phi_spill_slots;
+      current->SetSpillSlot(slot * kVRegSize);
+    } else if (current->HasSpillSlot()) {
+      // Adjust the stack slot, now that we know the number of them for each type.
+      // The way this implementation lays out the stack is the following:
+      // [parameter slots       ]
+      // [catch phi spill slots ]
+      // [double spill slots    ]
+      // [long spill slots      ]
+      // [float spill slots     ]
+      // [int/ref values        ]
+      // [maximum out values    ] (number of arguments for calls)
+      // [art method            ].
+      size_t slot = current->GetSpillSlot();
+      switch (current->GetType()) {
+        case Primitive::kPrimDouble:
+          slot += long_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimLong:
+          slot += float_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimFloat:
+          slot += int_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimNot:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimShort:
+          slot += reserved_out_slots;
+          break;
+        case Primitive::kPrimVoid:
+          LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+      }
+      current->SetSpillSlot(slot * kVRegSize);
+    }
+
+    Location source = current->ToLocation();
+
+    if (location.IsUnallocated()) {
+      if (location.GetPolicy() == Location::kSameAsFirstInput) {
+        if (locations->InAt(0).IsUnallocated()) {
+          locations->SetInAt(0, source);
+        } else {
+          DCHECK(locations->InAt(0).Equals(source));
+        }
+      }
+      locations->UpdateOut(source);
+    } else {
+      DCHECK(source.Equals(location));
+    }
+  }
+
+  // Connect siblings and resolve inputs.
+  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    ConnectSiblings(instruction->GetLiveInterval(),
+                    max_safepoint_live_core_regs + max_safepoint_live_fp_regs);
+  }
+
+  // Resolve non-linear control flow across branches. Order does not matter.
+  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (block->IsCatchBlock() ||
+        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // Instructions live at the top of catch blocks or irreducible loop header
+      // were forced to spill.
+      if (kIsDebugBuild) {
+        BitVector* live = liveness_.GetLiveInSet(*block);
+        for (uint32_t idx : live->Indexes()) {
+          LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+          LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
+          // `GetSiblingAt` returns the sibling that contains a position, but there could be
+          // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
+          // position.
+          if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
+            DCHECK(!sibling->HasRegister());
+          }
+        }
+      }
+    } else {
+      BitVector* live = liveness_.GetLiveInSet(*block);
+      for (uint32_t idx : live->Indexes()) {
+        LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+        for (HBasicBlock* predecessor : block->GetPredecessors()) {
+          ConnectSplitSiblings(interval, predecessor, block);
+        }
+      }
+    }
+  }
+
+  // Resolve phi inputs. Order does not matter.
+  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* current = it.Current();
+    if (current->IsCatchBlock()) {
+      // Catch phi values are set at runtime by the exception delivery mechanism.
+    } else {
+      for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+        HInstruction* phi = inst_it.Current();
+        for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
+          HBasicBlock* predecessor = current->GetPredecessors()[i];
+          DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
+          HInstruction* input = phi->InputAt(i);
+          Location source = input->GetLiveInterval()->GetLocationAt(
+              predecessor->GetLifetimeEnd() - 1);
+          Location destination = phi->GetLiveInterval()->ToLocation();
+          InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
+        }
+      }
+    }
+  }
+
+  // Resolve temp locations.
+  for (LiveInterval* temp : temp_intervals) {
+    if (temp->IsHighInterval()) {
+      // High intervals can be skipped, they are already handled by the low interval.
+      continue;
+    }
+    HInstruction* at = liveness_.GetTempUser(temp);
+    size_t temp_index = liveness_.GetTempIndex(temp);
+    LocationSummary* locations = at->GetLocations();
+    switch (temp->GetType()) {
+      case Primitive::kPrimInt:
+        locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
+        break;
+
+      case Primitive::kPrimDouble:
+        if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+          Location location = Location::FpuRegisterPairLocation(
+              temp->GetRegister(), temp->GetHighInterval()->GetRegister());
+          locations->SetTempAt(temp_index, location);
+        } else {
+          locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
+        }
+        break;
+
+      default:
+        LOG(FATAL) << "Unexpected type for temporary location "
+                   << temp->GetType();
+    }
+  }
+}
+
+void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval,
+                                                 size_t max_safepoint_live_regs) {
+  LiveInterval* current = interval;
+  if (current->HasSpillSlot()
+      && current->HasRegister()
+      // Currently, we spill unconditionnally the current method in the code generators.
+      && !interval->GetDefinedBy()->IsCurrentMethod()) {
+    // We spill eagerly, so move must be at definition.
+    InsertMoveAfter(interval->GetDefinedBy(),
+                    interval->ToLocation(),
+                    interval->NeedsTwoSpillSlots()
+                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+  }
+  UsePosition* use = current->GetFirstUse();
+  UsePosition* env_use = current->GetFirstEnvironmentUse();
+
+  // Walk over all siblings, updating locations of use positions, and
+  // connecting them when they are adjacent.
+  do {
+    Location source = current->ToLocation();
+
+    // Walk over all uses covered by this interval, and update the location
+    // information.
+
+    LiveRange* range = current->GetFirstRange();
+    while (range != nullptr) {
+      while (use != nullptr && use->GetPosition() < range->GetStart()) {
+        DCHECK(use->IsSynthesized());
+        use = use->GetNext();
+      }
+      while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+        DCHECK(!use->GetIsEnvironment());
+        DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
+        if (!use->IsSynthesized()) {
+          LocationSummary* locations = use->GetUser()->GetLocations();
+          Location expected_location = locations->InAt(use->GetInputIndex());
+          // The expected (actual) location may be invalid in case the input is unused. Currently
+          // this only happens for intrinsics.
+          if (expected_location.IsValid()) {
+            if (expected_location.IsUnallocated()) {
+              locations->SetInAt(use->GetInputIndex(), source);
+            } else if (!expected_location.IsConstant()) {
+              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+            }
+          } else {
+            DCHECK(use->GetUser()->IsInvoke());
+            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+          }
+        }
+        use = use->GetNext();
+      }
+
+      // Walk over the environment uses, and update their locations.
+      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+        env_use = env_use->GetNext();
+      }
+
+      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+        DCHECK(current->CoversSlow(env_use->GetPosition())
+               || (env_use->GetPosition() == range->GetEnd()));
+        HEnvironment* environment = env_use->GetEnvironment();
+        environment->SetLocationAt(env_use->GetInputIndex(), source);
+        env_use = env_use->GetNext();
+      }
+
+      range = range->GetNext();
+    }
+
+    // If the next interval starts just after this one, and has a register,
+    // insert a move.
+    LiveInterval* next_sibling = current->GetNextSibling();
+    if (next_sibling != nullptr
+        && next_sibling->HasRegister()
+        && current->GetEnd() == next_sibling->GetStart()) {
+      Location destination = next_sibling->ToLocation();
+      InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
+    }
+
+    for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+         safepoint_position != nullptr;
+         safepoint_position = safepoint_position->GetNext()) {
+      DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
+
+      LocationSummary* locations = safepoint_position->GetLocations();
+      if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+        DCHECK(interval->GetDefinedBy()->IsActualObject())
+            << interval->GetDefinedBy()->DebugName()
+            << "@" << safepoint_position->GetInstruction()->DebugName();
+        locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
+      }
+
+      switch (source.GetKind()) {
+        case Location::kRegister: {
+          locations->AddLiveRegister(source);
+          if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
+            DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+                      max_safepoint_live_regs);
+          }
+          if (current->GetType() == Primitive::kPrimNot) {
+            DCHECK(interval->GetDefinedBy()->IsActualObject())
+                << interval->GetDefinedBy()->DebugName()
+                << "@" << safepoint_position->GetInstruction()->DebugName();
+            locations->SetRegisterBit(source.reg());
+          }
+          break;
+        }
+        case Location::kFpuRegister: {
+          locations->AddLiveRegister(source);
+          break;
+        }
+
+        case Location::kRegisterPair:
+        case Location::kFpuRegisterPair: {
+          locations->AddLiveRegister(source.ToLow());
+          locations->AddLiveRegister(source.ToHigh());
+          break;
+        }
+        case Location::kStackSlot:  // Fall-through
+        case Location::kDoubleStackSlot:  // Fall-through
+        case Location::kConstant: {
+          // Nothing to do.
+          break;
+        }
+        default: {
+          LOG(FATAL) << "Unexpected location for object";
+        }
+      }
+    }
+    current = next_sibling;
+  } while (current != nullptr);
+
+  if (kIsDebugBuild) {
+    // Following uses can only be synthesized uses.
+    while (use != nullptr) {
+      DCHECK(use->IsSynthesized());
+      use = use->GetNext();
+    }
+  }
+}
+
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+    HInstruction* instruction) {
+  return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+         (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
+void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
+                                                      HBasicBlock* from,
+                                                      HBasicBlock* to) const {
+  if (interval->GetNextSibling() == nullptr) {
+    // Nothing to connect. The whole range was allocated to the same location.
+    return;
+  }
+
+  // Find the intervals that cover `from` and `to`.
+  size_t destination_position = to->GetLifetimeStart();
+  size_t source_position = from->GetLifetimeEnd() - 1;
+  LiveInterval* destination = interval->GetSiblingAt(destination_position);
+  LiveInterval* source = interval->GetSiblingAt(source_position);
+
+  if (destination == source) {
+    // Interval was not split.
+    return;
+  }
+
+  LiveInterval* parent = interval->GetParent();
+  HInstruction* defined_by = parent->GetDefinedBy();
+  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+      (destination == nullptr || !destination->CoversSlow(destination_position))) {
+    // Our live_in fixed point calculation has found that the instruction is live
+    // in the `to` block because it will eventually enter an irreducible loop. Our
+    // live interval computation however does not compute a fixed point, and
+    // therefore will not have a location for that instruction for `to`.
+    // Because the instruction is a constant or the ArtMethod, we don't need to
+    // do anything: it will be materialized in the irreducible loop.
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
+        << defined_by->DebugName() << ":" << defined_by->GetId()
+        << " " << from->GetBlockId() << " -> " << to->GetBlockId();
+    return;
+  }
+
+  if (!destination->HasRegister()) {
+    // Values are eagerly spilled. Spill slot already contains appropriate value.
+    return;
+  }
+
+  Location location_source;
+  // `GetSiblingAt` returns the interval whose start and end cover `position`,
+  // but does not check whether the interval is inactive at that position.
+  // The only situation where the interval is inactive at that position is in the
+  // presence of irreducible loops for constants and ArtMethod.
+  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+      (source == nullptr || !source->CoversSlow(source_position))) {
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    if (defined_by->IsConstant()) {
+      location_source = defined_by->GetLocations()->Out();
+    } else {
+      DCHECK(defined_by->IsCurrentMethod());
+      location_source = parent->NeedsTwoSpillSlots()
+          ? Location::DoubleStackSlot(parent->GetSpillSlot())
+          : Location::StackSlot(parent->GetSpillSlot());
+    }
+  } else {
+    DCHECK(source != nullptr);
+    DCHECK(source->CoversSlow(source_position));
+    DCHECK(destination->CoversSlow(destination_position));
+    location_source = source->ToLocation();
+  }
+
+  // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
+  // we need to put the moves at the entry of `to`.
+  if (from->GetNormalSuccessors().size() == 1) {
+    InsertParallelMoveAtExitOf(from,
+                               defined_by,
+                               location_source,
+                               destination->ToLocation());
+  } else {
+    DCHECK_EQ(to->GetPredecessors().size(), 1u);
+    InsertParallelMoveAtEntryOf(to,
+                                defined_by,
+                                location_source,
+                                destination->ToLocation());
+  }
+}
+
+static bool IsValidDestination(Location destination) {
+  return destination.IsRegister()
+      || destination.IsRegisterPair()
+      || destination.IsFpuRegister()
+      || destination.IsFpuRegisterPair()
+      || destination.IsStackSlot()
+      || destination.IsDoubleStackSlot();
+}
+
+void RegisterAllocationResolver::AddMove(HParallelMove* move,
+                                         Location source,
+                                         Location destination,
+                                         HInstruction* instruction,
+                                         Primitive::Type type) const {
+  if (type == Primitive::kPrimLong
+      && codegen_->ShouldSplitLongMoves()
+      // The parallel move resolver knows how to deal with long constants.
+      && !source.IsConstant()) {
+    move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
+    move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
+  } else {
+    move->AddMove(source, destination, type, instruction);
+  }
+}
+
+void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input,
+                                                 HInstruction* user,
+                                                 Location source,
+                                                 Location destination) const {
+  if (source.Equals(destination)) return;
+
+  DCHECK(!user->IsPhi());
+
+  HInstruction* previous = user->GetPrevious();
+  HParallelMove* move = nullptr;
+  if (previous == nullptr
+      || !previous->IsParallelMove()
+      || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(user->GetLifetimePosition());
+    user->GetBlock()->InsertInstructionBefore(move, user);
+  } else {
+    move = previous->AsParallelMove();
+  }
+  DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
+  AddMove(move, source, destination, nullptr, input->GetType());
+}
+
+static bool IsInstructionStart(size_t position) {
+  return (position & 1) == 0;
+}
+
+static bool IsInstructionEnd(size_t position) {
+  return (position & 1) == 1;
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
+                                                      HInstruction* instruction,
+                                                      Location source,
+                                                      Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
+  HParallelMove* move;
+  if (at == nullptr) {
+    if (IsInstructionStart(position)) {
+      // Block boundary, don't do anything the connection of split siblings will handle it.
+      return;
+    } else {
+      // Move must happen before the first instruction of the block.
+      at = liveness_.GetInstructionFromPosition((position + 1) / 2);
+      // Note that parallel moves may have already been inserted, so we explicitly
+      // ask for the first instruction of the block: `GetInstructionFromPosition` does
+      // not contain the `HParallelMove` instructions.
+      at = at->GetBlock()->GetFirstInstruction();
+
+      if (at->GetLifetimePosition() < position) {
+        // We may insert moves for split siblings and phi spills at the beginning of the block.
+        // Since this is a different lifetime position, we need to go to the next instruction.
+        DCHECK(at->IsParallelMove());
+        at = at->GetNext();
+      }
+
+      if (at->GetLifetimePosition() != position) {
+        DCHECK_GT(at->GetLifetimePosition(), position);
+        move = new (allocator_) HParallelMove(allocator_);
+        move->SetLifetimePosition(position);
+        at->GetBlock()->InsertInstructionBefore(move, at);
+      } else {
+        DCHECK(at->IsParallelMove());
+        move = at->AsParallelMove();
+      }
+    }
+  } else if (IsInstructionEnd(position)) {
+    // Move must happen after the instruction.
+    DCHECK(!at->IsControlFlow());
+    move = at->GetNext()->AsParallelMove();
+    // This is a parallel move for connecting siblings in a same block. We need to
+    // differentiate it with moves for connecting blocks, and input moves.
+    if (move == nullptr || move->GetLifetimePosition() > position) {
+      move = new (allocator_) HParallelMove(allocator_);
+      move->SetLifetimePosition(position);
+      at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
+    }
+  } else {
+    // Move must happen before the instruction.
+    HInstruction* previous = at->GetPrevious();
+    if (previous == nullptr
+        || !previous->IsParallelMove()
+        || previous->GetLifetimePosition() != position) {
+      // If the previous is a parallel move, then its position must be lower
+      // than the given `position`: it was added just after the non-parallel
+      // move instruction that precedes `instruction`.
+      DCHECK(previous == nullptr
+             || !previous->IsParallelMove()
+             || previous->GetLifetimePosition() < position);
+      move = new (allocator_) HParallelMove(allocator_);
+      move->SetLifetimePosition(position);
+      at->GetBlock()->InsertInstructionBefore(move, at);
+    } else {
+      move = previous->AsParallelMove();
+    }
+  }
+  DCHECK_EQ(move->GetLifetimePosition(), position);
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                                            HInstruction* instruction,
+                                                            Location source,
+                                                            Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
+  HInstruction* last = block->GetLastInstruction();
+  // We insert moves at exit for phi predecessors and connecting blocks.
+  // A block ending with an if or a packed switch cannot branch to a block
+  // with phis because we do not allow critical edges. It can also not connect
+  // a split interval between two blocks: the move has to happen in the successor.
+  DCHECK(!last->IsIf() && !last->IsPackedSwitch());
+  HInstruction* previous = last->GetPrevious();
+  HParallelMove* move;
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and output moves.
+  size_t position = last->GetLifetimePosition();
+  if (previous == nullptr || !previous->IsParallelMove()
+      || previous->AsParallelMove()->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    block->InsertInstructionBefore(move, last);
+  } else {
+    move = previous->AsParallelMove();
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                                             HInstruction* instruction,
+                                                             Location source,
+                                                             Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  HInstruction* first = block->GetFirstInstruction();
+  HParallelMove* move = first->AsParallelMove();
+  size_t position = block->GetLifetimeStart();
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and input moves.
+  if (move == nullptr || move->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    block->InsertInstructionBefore(move, first);
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction,
+                                                 Location source,
+                                                 Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  if (instruction->IsPhi()) {
+    InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
+    return;
+  }
+
+  size_t position = instruction->GetLifetimePosition() + 1;
+  HParallelMove* move = instruction->GetNext()->AsParallelMove();
+  // This is a parallel move for moving the output of an instruction. We need
+  // to differentiate with input moves, moves for connecting siblings in a
+  // and moves for connecting blocks.
+  if (move == nullptr || move->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
new file mode 100644
index 0000000..6ceb9bc
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+
+#include "base/arena_containers.h"
+#include "base/value_object.h"
+#include "primitive.h"
+
+namespace art {
+
+class ArenaAllocator;
+class CodeGenerator;
+class HBasicBlock;
+class HInstruction;
+class HParallelMove;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * Reconciles the locations assigned to live intervals with the location
+ * summary of each instruction, and inserts moves to resolve split intervals,
+ * nonlinear control flow, and phi inputs.
+ */
+class RegisterAllocationResolver : ValueObject {
+ public:
+  RegisterAllocationResolver(ArenaAllocator* allocator,
+                             CodeGenerator* codegen,
+                             const SsaLivenessAnalysis& liveness);
+
+  void Resolve(size_t max_safepoint_live_core_regs,
+               size_t max_safepoint_live_fp_regs,
+               size_t reserved_out_slots,  // Includes slot(s) for the art method.
+               size_t int_spill_slots,
+               size_t long_spill_slots,
+               size_t float_spill_slots,
+               size_t double_spill_slots,
+               size_t catch_phi_spill_slots,
+               const ArenaVector<LiveInterval*>& temp_intervals);
+
+ private:
+  // Connect adjacent siblings within blocks, and resolve inputs along the way.
+  // Uses max_safepoint_live_regs to check that we did not underestimate the
+  // number of live registers at safepoints.
+  void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs);
+
+  // Connect siblings between block entries and exits.
+  void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
+
+  // Helper methods for inserting parallel moves in the graph.
+  void InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                  HInstruction* instruction,
+                                  Location source,
+                                  Location destination) const;
+  void InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                   HInstruction* instruction,
+                                   Location source,
+                                   Location destination) const;
+  void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
+  void AddInputMoveFor(HInstruction* input,
+                       HInstruction* user,
+                       Location source,
+                       Location destination) const;
+  void InsertParallelMoveAt(size_t position,
+                            HInstruction* instruction,
+                            Location source,
+                            Location destination) const;
+  void AddMove(HParallelMove* move,
+               Location source,
+               Location destination,
+               HInstruction* instruction,
+               Primitive::Type type) const;
+
+  ArenaAllocator* const allocator_;
+  CodeGenerator* const codegen_;
+  const SsaLivenessAnalysis& liveness_;
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocationResolver);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 4405b80..5b768d5 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,65 +21,33 @@
 
 #include "base/bit_vector-inl.h"
 #include "code_generator.h"
+#include "register_allocator_graph_color.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 
+
 namespace art {
 
-static constexpr size_t kMaxLifetimePosition = -1;
-static constexpr size_t kDefaultNumberOfSpillSlots = 4;
-
-// For simplicity, we implement register pairs as (reg, reg + 1).
-// Note that this is a requirement for double registers on ARM, since we
-// allocate SRegister.
-static int GetHighForLowRegister(int reg) { return reg + 1; }
-static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
-static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
-  return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
-}
-
 RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
                                      CodeGenerator* codegen,
                                      const SsaLivenessAnalysis& liveness)
-      : allocator_(allocator),
-        codegen_(codegen),
-        liveness_(liveness),
-        unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        unhandled_(nullptr),
-        handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        catch_phi_spill_slots_(0),
-        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        processing_core_registers_(false),
-        number_of_registers_(-1),
-        registers_array_(nullptr),
-        blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
-        blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
-        reserved_out_slots_(0),
-        maximum_number_of_live_core_registers_(0),
-        maximum_number_of_live_fp_registers_(0) {
-  temp_intervals_.reserve(4);
-  int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+    : allocator_(allocator),
+      codegen_(codegen),
+      liveness_(liveness) {}
 
-  codegen->SetupBlockedRegisters();
-  physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
-  physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
-  // Always reserve for the current method and the graph's max out registers.
-  // TODO: compute it instead.
-  // ArtMethod* takes 2 vregs for 64 bits.
-  reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize +
-      codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator,
+                                             CodeGenerator* codegen,
+                                             const SsaLivenessAnalysis& analysis,
+                                             Strategy strategy) {
+  switch (strategy) {
+    case kRegisterAllocatorLinearScan:
+      return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis);
+    case kRegisterAllocatorGraphColor:
+      return new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis);
+    default:
+      LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
+      UNREACHABLE();
+  }
 }
 
 bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
@@ -93,328 +61,6 @@
       || instruction_set == kX86_64;
 }
 
-static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
-  if (interval == nullptr) return false;
-  bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
-      && (interval->GetType() != Primitive::kPrimFloat);
-  return processing_core_registers == is_core_register;
-}
-
-void RegisterAllocator::AllocateRegisters() {
-  AllocateRegistersInternal();
-  Resolve();
-
-  if (kIsDebugBuild) {
-    processing_core_registers_ = true;
-    ValidateInternal(true);
-    processing_core_registers_ = false;
-    ValidateInternal(true);
-    // Check that the linear order is still correct with regards to lifetime positions.
-    // Since only parallel moves have been inserted during the register allocation,
-    // these checks are mostly for making sure these moves have been added correctly.
-    size_t current_liveness = 0;
-    for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-      HBasicBlock* block = it.Current();
-      for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-        HInstruction* instruction = inst_it.Current();
-        DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
-        current_liveness = instruction->GetLifetimePosition();
-      }
-      for (HInstructionIterator inst_it(block->GetInstructions());
-           !inst_it.Done();
-           inst_it.Advance()) {
-        HInstruction* instruction = inst_it.Current();
-        DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
-        current_liveness = instruction->GetLifetimePosition();
-      }
-    }
-  }
-}
-
-void RegisterAllocator::BlockRegister(Location location, size_t start, size_t end) {
-  int reg = location.reg();
-  DCHECK(location.IsRegister() || location.IsFpuRegister());
-  LiveInterval* interval = location.IsRegister()
-      ? physical_core_register_intervals_[reg]
-      : physical_fp_register_intervals_[reg];
-  Primitive::Type type = location.IsRegister()
-      ? Primitive::kPrimInt
-      : Primitive::kPrimFloat;
-  if (interval == nullptr) {
-    interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
-    if (location.IsRegister()) {
-      physical_core_register_intervals_[reg] = interval;
-    } else {
-      physical_fp_register_intervals_[reg] = interval;
-    }
-  }
-  DCHECK(interval->GetRegister() == reg);
-  interval->AddRange(start, end);
-}
-
-void RegisterAllocator::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
-  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
-    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
-      BlockRegister(Location::RegisterLocation(i), start, end);
-    }
-  }
-  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
-    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
-      BlockRegister(Location::FpuRegisterLocation(i), start, end);
-    }
-  }
-}
-
-void RegisterAllocator::AllocateRegistersInternal() {
-  // Iterate post-order, to ensure the list is sorted, and the last added interval
-  // is the one with the lowest start position.
-  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
-         back_it.Advance()) {
-      ProcessInstruction(back_it.Current());
-    }
-    for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-      ProcessInstruction(inst_it.Current());
-    }
-
-    if (block->IsCatchBlock() ||
-        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
-      // By blocking all registers at the top of each catch block or irreducible loop, we force
-      // intervals belonging to the live-in set of the catch/header block to be spilled.
-      // TODO(ngeoffray): Phis in this block could be allocated in register.
-      size_t position = block->GetLifetimeStart();
-      BlockRegisters(position, position + 1);
-    }
-  }
-
-  number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
-  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
-                                                    kArenaAllocRegisterAllocator);
-  processing_core_registers_ = true;
-  unhandled_ = &unhandled_core_intervals_;
-  for (LiveInterval* fixed : physical_core_register_intervals_) {
-    if (fixed != nullptr) {
-      // Fixed interval is added to inactive_ instead of unhandled_.
-      // It's also the only type of inactive interval whose start position
-      // can be after the current interval during linear scan.
-      // Fixed interval is never split and never moves to unhandled_.
-      inactive_.push_back(fixed);
-    }
-  }
-  LinearScan();
-
-  inactive_.clear();
-  active_.clear();
-  handled_.clear();
-
-  number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
-  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
-                                                    kArenaAllocRegisterAllocator);
-  processing_core_registers_ = false;
-  unhandled_ = &unhandled_fp_intervals_;
-  for (LiveInterval* fixed : physical_fp_register_intervals_) {
-    if (fixed != nullptr) {
-      // Fixed interval is added to inactive_ instead of unhandled_.
-      // It's also the only type of inactive interval whose start position
-      // can be after the current interval during linear scan.
-      // Fixed interval is never split and never moves to unhandled_.
-      inactive_.push_back(fixed);
-    }
-  }
-  LinearScan();
-}
-
-void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  size_t position = instruction->GetLifetimePosition();
-
-  if (locations == nullptr) return;
-
-  // Create synthesized intervals for temporaries.
-  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
-    Location temp = locations->GetTemp(i);
-    if (temp.IsRegister() || temp.IsFpuRegister()) {
-      BlockRegister(temp, position, position + 1);
-      // Ensure that an explicit temporary register is marked as being allocated.
-      codegen_->AddAllocatedRegister(temp);
-    } else {
-      DCHECK(temp.IsUnallocated());
-      switch (temp.GetPolicy()) {
-        case Location::kRequiresRegister: {
-          LiveInterval* interval =
-              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
-          temp_intervals_.push_back(interval);
-          interval->AddTempUse(instruction, i);
-          unhandled_core_intervals_.push_back(interval);
-          break;
-        }
-
-        case Location::kRequiresFpuRegister: {
-          LiveInterval* interval =
-              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
-          temp_intervals_.push_back(interval);
-          interval->AddTempUse(instruction, i);
-          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
-            interval->AddHighInterval(/* is_temp */ true);
-            LiveInterval* high = interval->GetHighInterval();
-            temp_intervals_.push_back(high);
-            unhandled_fp_intervals_.push_back(high);
-          }
-          unhandled_fp_intervals_.push_back(interval);
-          break;
-        }
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << temp.GetPolicy();
-      }
-    }
-  }
-
-  bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
-      && (instruction->GetType() != Primitive::kPrimFloat);
-
-  if (locations->NeedsSafepoint()) {
-    if (codegen_->IsLeafMethod()) {
-      // TODO: We do this here because we do not want the suspend check to artificially
-      // create live registers. We should find another place, but this is currently the
-      // simplest.
-      DCHECK(instruction->IsSuspendCheckEntry());
-      instruction->GetBlock()->RemoveInstruction(instruction);
-      return;
-    }
-    safepoints_.push_back(instruction);
-    if (locations->OnlyCallsOnSlowPath()) {
-      // We add a synthesized range at this position to record the live registers
-      // at this position. Ideally, we could just update the safepoints when locations
-      // are updated, but we currently need to know the full stack size before updating
-      // locations (because of parameters and the fact that we don't have a frame pointer).
-      // And knowing the full stack size requires to know the maximum number of live
-      // registers at calls in slow paths.
-      // By adding the following interval in the algorithm, we can compute this
-      // maximum before updating locations.
-      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
-      interval->AddRange(position, position + 1);
-      AddSorted(&unhandled_core_intervals_, interval);
-      AddSorted(&unhandled_fp_intervals_, interval);
-    }
-  }
-
-  if (locations->WillCall()) {
-    BlockRegisters(position, position + 1, /* caller_save_only */ true);
-  }
-
-  for (size_t i = 0; i < instruction->InputCount(); ++i) {
-    Location input = locations->InAt(i);
-    if (input.IsRegister() || input.IsFpuRegister()) {
-      BlockRegister(input, position, position + 1);
-    } else if (input.IsPair()) {
-      BlockRegister(input.ToLow(), position, position + 1);
-      BlockRegister(input.ToHigh(), position, position + 1);
-    }
-  }
-
-  LiveInterval* current = instruction->GetLiveInterval();
-  if (current == nullptr) return;
-
-  ArenaVector<LiveInterval*>& unhandled = core_register
-      ? unhandled_core_intervals_
-      : unhandled_fp_intervals_;
-
-  DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
-
-  if (codegen_->NeedsTwoRegisters(current->GetType())) {
-    current->AddHighInterval();
-  }
-
-  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
-    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
-    size_t safepoint_position = safepoint->GetLifetimePosition();
-
-    // Test that safepoints are ordered in the optimal way.
-    DCHECK(safepoint_index == safepoints_.size() ||
-           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
-
-    if (safepoint_position == current->GetStart()) {
-      // The safepoint is for this instruction, so the location of the instruction
-      // does not need to be saved.
-      DCHECK_EQ(safepoint_index, safepoints_.size());
-      DCHECK_EQ(safepoint, instruction);
-      continue;
-    } else if (current->IsDeadAt(safepoint_position)) {
-      break;
-    } else if (!current->Covers(safepoint_position)) {
-      // Hole in the interval.
-      continue;
-    }
-    current->AddSafepoint(safepoint);
-  }
-  current->ResetSearchCache();
-
-  // Some instructions define their output in fixed register/stack slot. We need
-  // to ensure we know these locations before doing register allocation. For a
-  // given register, we create an interval that covers these locations. The register
-  // will be unavailable at these locations when trying to allocate one for an
-  // interval.
-  //
-  // The backwards walking ensures the ranges are ordered on increasing start positions.
-  Location output = locations->Out();
-  if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
-    Location first = locations->InAt(0);
-    if (first.IsRegister() || first.IsFpuRegister()) {
-      current->SetFrom(position + 1);
-      current->SetRegister(first.reg());
-    } else if (first.IsPair()) {
-      current->SetFrom(position + 1);
-      current->SetRegister(first.low());
-      LiveInterval* high = current->GetHighInterval();
-      high->SetRegister(first.high());
-      high->SetFrom(position + 1);
-    }
-  } else if (output.IsRegister() || output.IsFpuRegister()) {
-    // Shift the interval's start by one to account for the blocked register.
-    current->SetFrom(position + 1);
-    current->SetRegister(output.reg());
-    BlockRegister(output, position, position + 1);
-  } else if (output.IsPair()) {
-    current->SetFrom(position + 1);
-    current->SetRegister(output.low());
-    LiveInterval* high = current->GetHighInterval();
-    high->SetRegister(output.high());
-    high->SetFrom(position + 1);
-    BlockRegister(output.ToLow(), position, position + 1);
-    BlockRegister(output.ToHigh(), position, position + 1);
-  } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
-    current->SetSpillSlot(output.GetStackIndex());
-  } else {
-    DCHECK(output.IsUnallocated() || output.IsConstant());
-  }
-
-  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
-    AllocateSpillSlotForCatchPhi(instruction->AsPhi());
-  }
-
-  // If needed, add interval to the list of unhandled intervals.
-  if (current->HasSpillSlot() || instruction->IsConstant()) {
-    // Split just before first register use.
-    size_t first_register_use = current->FirstRegisterUse();
-    if (first_register_use != kNoLifetime) {
-      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
-      // Don't add directly to `unhandled`, it needs to be sorted and the start
-      // of this new interval might be after intervals already in the list.
-      AddSorted(&unhandled, split);
-    } else {
-      // Nothing to do, we won't allocate a register for this value.
-    }
-  } else {
-    // Don't add directly to `unhandled`, temp or safepoint intervals
-    // for this instruction may have been added, and those can be
-    // processed first.
-    AddSorted(&unhandled, current);
-  }
-}
-
 class AllRangesIterator : public ValueObject {
  public:
   explicit AllRangesIterator(LiveInterval* interval)
@@ -442,36 +88,6 @@
   DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
 };
 
-bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
-  // To simplify unit testing, we eagerly create the array of intervals, and
-  // call the helper method.
-  ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
-  for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
-      intervals.push_back(instruction->GetLiveInterval());
-    }
-  }
-
-  const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
-      ? &physical_core_register_intervals_
-      : &physical_fp_register_intervals_;
-  for (LiveInterval* fixed : *physical_register_intervals) {
-    if (fixed != nullptr) {
-      intervals.push_back(fixed);
-    }
-  }
-
-  for (LiveInterval* temp : temp_intervals_) {
-    if (ShouldProcess(processing_core_registers_, temp)) {
-      intervals.push_back(temp);
-    }
-  }
-
-  return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
-                           allocator_, processing_core_registers_, log_fatal_on_failure);
-}
-
 bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
                                           size_t number_of_spill_slots,
                                           size_t number_of_out_slots,
@@ -550,6 +166,19 @@
               } else {
                 codegen.DumpFloatingPointRegister(message, current->GetRegister());
               }
+              for (LiveInterval* interval : intervals) {
+                if (interval->HasRegister()
+                    && interval->GetRegister() == current->GetRegister()
+                    && interval->CoversSlow(j)) {
+                  message << std::endl;
+                  if (interval->GetDefinedBy() != nullptr) {
+                    message << interval->GetDefinedBy()->GetKind() << " ";
+                  } else {
+                    message << "physical ";
+                  }
+                  interval->Dump(message);
+                }
+              }
               LOG(FATAL) << message.str();
             } else {
               return false;
@@ -564,637 +193,30 @@
   return true;
 }
 
-void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
-  interval->Dump(stream);
-  stream << ": ";
-  if (interval->HasRegister()) {
-    if (interval->IsFloatingPoint()) {
-      codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
-    } else {
-      codegen_->DumpCoreRegister(stream, interval->GetRegister());
-    }
-  } else {
-    stream << "spilled";
-  }
-  stream << std::endl;
-}
-
-void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const {
-  stream << "inactive: " << std::endl;
-  for (LiveInterval* inactive_interval : inactive_) {
-    DumpInterval(stream, inactive_interval);
-  }
-  stream << "active: " << std::endl;
-  for (LiveInterval* active_interval : active_) {
-    DumpInterval(stream, active_interval);
-  }
-  stream << "unhandled: " << std::endl;
-  auto unhandled = (unhandled_ != nullptr) ?
-      unhandled_ : &unhandled_core_intervals_;
-  for (LiveInterval* unhandled_interval : *unhandled) {
-    DumpInterval(stream, unhandled_interval);
-  }
-  stream << "handled: " << std::endl;
-  for (LiveInterval* handled_interval : handled_) {
-    DumpInterval(stream, handled_interval);
-  }
-}
-
-// By the book implementation of a linear scan register allocator.
-void RegisterAllocator::LinearScan() {
-  while (!unhandled_->empty()) {
-    // (1) Remove interval with the lowest start position from unhandled.
-    LiveInterval* current = unhandled_->back();
-    unhandled_->pop_back();
-
-    // Make sure the interval is an expected state.
-    DCHECK(!current->IsFixed() && !current->HasSpillSlot());
-    // Make sure we are going in the right order.
-    DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
-    // Make sure a low interval is always with a high.
-    DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
-    // Make sure a high interval is always with a low.
-    DCHECK(current->IsLowInterval() ||
-           unhandled_->empty() ||
-           !unhandled_->back()->IsHighInterval());
-
-    size_t position = current->GetStart();
-
-    // Remember the inactive_ size here since the ones moved to inactive_ from
-    // active_ below shouldn't need to be re-checked.
-    size_t inactive_intervals_to_handle = inactive_.size();
-
-    // (2) Remove currently active intervals that are dead at this position.
-    //     Move active intervals that have a lifetime hole at this position
-    //     to inactive.
-    auto active_kept_end = std::remove_if(
-        active_.begin(),
-        active_.end(),
-        [this, position](LiveInterval* interval) {
-          if (interval->IsDeadAt(position)) {
-            handled_.push_back(interval);
-            return true;
-          } else if (!interval->Covers(position)) {
-            inactive_.push_back(interval);
-            return true;
-          } else {
-            return false;  // Keep this interval.
-          }
-        });
-    active_.erase(active_kept_end, active_.end());
-
-    // (3) Remove currently inactive intervals that are dead at this position.
-    //     Move inactive intervals that cover this position to active.
-    auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
-    auto inactive_kept_end = std::remove_if(
-        inactive_.begin(),
-        inactive_to_handle_end,
-        [this, position](LiveInterval* interval) {
-          DCHECK(interval->GetStart() < position || interval->IsFixed());
-          if (interval->IsDeadAt(position)) {
-            handled_.push_back(interval);
-            return true;
-          } else if (interval->Covers(position)) {
-            active_.push_back(interval);
-            return true;
-          } else {
-            return false;  // Keep this interval.
-          }
-        });
-    inactive_.erase(inactive_kept_end, inactive_to_handle_end);
-
-    if (current->IsSlowPathSafepoint()) {
-      // Synthesized interval to record the maximum number of live registers
-      // at safepoints. No need to allocate a register for it.
-      if (processing_core_registers_) {
-        maximum_number_of_live_core_registers_ =
-          std::max(maximum_number_of_live_core_registers_, active_.size());
-      } else {
-        maximum_number_of_live_fp_registers_ =
-          std::max(maximum_number_of_live_fp_registers_, active_.size());
-      }
-      DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
-      continue;
-    }
-
-    if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
-      DCHECK(!current->HasRegister());
-      // Allocating the low part was unsucessful. The splitted interval for the high part
-      // will be handled next (it is in the `unhandled_` list).
-      continue;
-    }
-
-    // (4) Try to find an available register.
-    bool success = TryAllocateFreeReg(current);
-
-    // (5) If no register could be found, we need to spill.
-    if (!success) {
-      success = AllocateBlockedReg(current);
-    }
-
-    // (6) If the interval had a register allocated, add it to the list of active
-    //     intervals.
-    if (success) {
-      codegen_->AddAllocatedRegister(processing_core_registers_
-          ? Location::RegisterLocation(current->GetRegister())
-          : Location::FpuRegisterLocation(current->GetRegister()));
-      active_.push_back(current);
-      if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
-        current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
-      }
-    }
-  }
-}
-
-static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
-  DCHECK(!interval->IsHighInterval());
-  // Note that the same instruction may occur multiple times in the input list,
-  // so `free_until` may have changed already.
-  // Since `position` is not the current scan position, we need to use CoversSlow.
-  if (interval->IsDeadAt(position)) {
-    // Set the register to be free. Note that inactive intervals might later
-    // update this.
-    free_until[interval->GetRegister()] = kMaxLifetimePosition;
+LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
+  DCHECK_GE(position, interval->GetStart());
+  DCHECK(!interval->IsDeadAt(position));
+  if (position == interval->GetStart()) {
+    // Spill slot will be allocated when handling `interval` again.
+    interval->ClearRegister();
     if (interval->HasHighInterval()) {
-      DCHECK(interval->GetHighInterval()->IsDeadAt(position));
-      free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+      interval->GetHighInterval()->ClearRegister();
+    } else if (interval->HasLowInterval()) {
+      interval->GetLowInterval()->ClearRegister();
     }
-  } else if (!interval->CoversSlow(position)) {
-    // The interval becomes inactive at `defined_by`. We make its register
-    // available only until the next use strictly after `defined_by`.
-    free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+    return interval;
+  } else {
+    LiveInterval* new_interval = interval->SplitAt(position);
     if (interval->HasHighInterval()) {
-      DCHECK(!interval->GetHighInterval()->CoversSlow(position));
-      free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+      LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+      new_interval->SetHighInterval(high);
+      high->SetLowInterval(new_interval);
+    } else if (interval->HasLowInterval()) {
+      LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+      new_interval->SetLowInterval(low);
+      low->SetHighInterval(new_interval);
     }
-  }
-}
-
-// Find a free register. If multiple are found, pick the register that
-// is free the longest.
-bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
-  size_t* free_until = registers_array_;
-
-  // First set all registers to be free.
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    free_until[i] = kMaxLifetimePosition;
-  }
-
-  // For each active interval, set its register to not free.
-  for (LiveInterval* interval : active_) {
-    DCHECK(interval->HasRegister());
-    free_until[interval->GetRegister()] = 0;
-  }
-
-  // An interval that starts an instruction (that is, it is not split), may
-  // re-use the registers used by the inputs of that instruciton, based on the
-  // location summary.
-  HInstruction* defined_by = current->GetDefinedBy();
-  if (defined_by != nullptr && !current->IsSplit()) {
-    LocationSummary* locations = defined_by->GetLocations();
-    if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
-      for (size_t i = 0, e = defined_by->InputCount(); i < e; ++i) {
-        // Take the last interval of the input. It is the location of that interval
-        // that will be used at `defined_by`.
-        LiveInterval* interval = defined_by->InputAt(i)->GetLiveInterval()->GetLastSibling();
-        // Note that interval may have not been processed yet.
-        // TODO: Handle non-split intervals last in the work list.
-        if (locations->InAt(i).IsValid()
-            && interval->HasRegister()
-            && interval->SameRegisterKind(*current)) {
-          // The input must be live until the end of `defined_by`, to comply to
-          // the linear scan algorithm. So we use `defined_by`'s end lifetime
-          // position to check whether the input is dead or is inactive after
-          // `defined_by`.
-          DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
-          size_t position = defined_by->GetLifetimePosition() + 1;
-          FreeIfNotCoverAt(interval, position, free_until);
-        }
-      }
-    }
-  }
-
-  // For each inactive interval, set its register to be free until
-  // the next intersection with `current`.
-  for (LiveInterval* inactive : inactive_) {
-    // Temp/Slow-path-safepoint interval has no holes.
-    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
-    if (!current->IsSplit() && !inactive->IsFixed()) {
-      // Neither current nor inactive are fixed.
-      // Thanks to SSA, a non-split interval starting in a hole of an
-      // inactive interval should never intersect with that inactive interval.
-      // Only if it's not fixed though, because fixed intervals don't come from SSA.
-      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-      continue;
-    }
-
-    DCHECK(inactive->HasRegister());
-    if (free_until[inactive->GetRegister()] == 0) {
-      // Already used by some active interval. No need to intersect.
-      continue;
-    }
-    size_t next_intersection = inactive->FirstIntersectionWith(current);
-    if (next_intersection != kNoLifetime) {
-      free_until[inactive->GetRegister()] =
-          std::min(free_until[inactive->GetRegister()], next_intersection);
-    }
-  }
-
-  int reg = kNoRegister;
-  if (current->HasRegister()) {
-    // Some instructions have a fixed register output.
-    reg = current->GetRegister();
-    if (free_until[reg] == 0) {
-      DCHECK(current->IsHighInterval());
-      // AllocateBlockedReg will spill the holder of the register.
-      return false;
-    }
-  } else {
-    DCHECK(!current->IsHighInterval());
-    int hint = current->FindFirstRegisterHint(free_until, liveness_);
-    if ((hint != kNoRegister)
-        // For simplicity, if the hint we are getting for a pair cannot be used,
-        // we are just going to allocate a new pair.
-        && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
-      DCHECK(!IsBlocked(hint));
-      reg = hint;
-    } else if (current->IsLowInterval()) {
-      reg = FindAvailableRegisterPair(free_until, current->GetStart());
-    } else {
-      reg = FindAvailableRegister(free_until, current);
-    }
-  }
-
-  DCHECK_NE(reg, kNoRegister);
-  // If we could not find a register, we need to spill.
-  if (free_until[reg] == 0) {
-    return false;
-  }
-
-  if (current->IsLowInterval()) {
-    // If the high register of this interval is not available, we need to spill.
-    int high_reg = current->GetHighInterval()->GetRegister();
-    if (high_reg == kNoRegister) {
-      high_reg = GetHighForLowRegister(reg);
-    }
-    if (free_until[high_reg] == 0) {
-      return false;
-    }
-  }
-
-  current->SetRegister(reg);
-  if (!current->IsDeadAt(free_until[reg])) {
-    // If the register is only available for a subset of live ranges
-    // covered by `current`, split `current` before the position where
-    // the register is not available anymore.
-    LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
-    DCHECK(split != nullptr);
-    AddSorted(unhandled_, split);
-  }
-  return true;
-}
-
-bool RegisterAllocator::IsBlocked(int reg) const {
-  return processing_core_registers_
-      ? blocked_core_registers_[reg]
-      : blocked_fp_registers_[reg];
-}
-
-int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
-  int reg = kNoRegister;
-  // Pick the register pair that is used the last.
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    if (IsBlocked(i)) continue;
-    if (!IsLowRegister(i)) continue;
-    int high_register = GetHighForLowRegister(i);
-    if (IsBlocked(high_register)) continue;
-    int existing_high_register = GetHighForLowRegister(reg);
-    if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
-                        && next_use[high_register] >= next_use[existing_high_register])) {
-      reg = i;
-      if (next_use[i] == kMaxLifetimePosition
-          && next_use[high_register] == kMaxLifetimePosition) {
-        break;
-      }
-    } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
-      // If one of the current register is known to be unavailable, just unconditionally
-      // try a new one.
-      reg = i;
-    }
-  }
-  return reg;
-}
-
-bool RegisterAllocator::IsCallerSaveRegister(int reg) const {
-  return processing_core_registers_
-      ? !codegen_->IsCoreCalleeSaveRegister(reg)
-      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
-}
-
-int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
-  // We special case intervals that do not span a safepoint to try to find a caller-save
-  // register if one is available. We iterate from 0 to the number of registers,
-  // so if there are caller-save registers available at the end, we continue the iteration.
-  bool prefers_caller_save = !current->HasWillCallSafepoint();
-  int reg = kNoRegister;
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    if (IsBlocked(i)) {
-      // Register cannot be used. Continue.
-      continue;
-    }
-
-    // Best case: we found a register fully available.
-    if (next_use[i] == kMaxLifetimePosition) {
-      if (prefers_caller_save && !IsCallerSaveRegister(i)) {
-        // We can get shorter encodings on some platforms by using
-        // small register numbers. So only update the candidate if the previous
-        // one was not available for the whole method.
-        if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
-          reg = i;
-        }
-        // Continue the iteration in the hope of finding a caller save register.
-        continue;
-      } else {
-        reg = i;
-        // We know the register is good enough. Return it.
-        break;
-      }
-    }
-
-    // If we had no register before, take this one as a reference.
-    if (reg == kNoRegister) {
-      reg = i;
-      continue;
-    }
-
-    // Pick the register that is used the last.
-    if (next_use[i] > next_use[reg]) {
-      reg = i;
-      continue;
-    }
-  }
-  return reg;
-}
-
-// Remove interval and its other half if any. Return iterator to the following element.
-static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
-    ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
-  DCHECK(intervals->begin() <= pos && pos < intervals->end());
-  LiveInterval* interval = *pos;
-  if (interval->IsLowInterval()) {
-    DCHECK(pos + 1 < intervals->end());
-    DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
-    return intervals->erase(pos, pos + 2);
-  } else if (interval->IsHighInterval()) {
-    DCHECK(intervals->begin() < pos);
-    DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
-    return intervals->erase(pos - 1, pos + 1);
-  } else {
-    return intervals->erase(pos);
-  }
-}
-
-bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
-                                                                 size_t first_register_use,
-                                                                 size_t* next_use) {
-  for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
-    LiveInterval* active = *it;
-    DCHECK(active->HasRegister());
-    if (active->IsFixed()) continue;
-    if (active->IsHighInterval()) continue;
-    if (first_register_use > next_use[active->GetRegister()]) continue;
-
-    // Split the first interval found that is either:
-    // 1) A non-pair interval.
-    // 2) A pair interval whose high is not low + 1.
-    // 3) A pair interval whose low is not even.
-    if (!active->IsLowInterval() ||
-        IsLowOfUnalignedPairInterval(active) ||
-        !IsLowRegister(active->GetRegister())) {
-      LiveInterval* split = Split(active, position);
-      if (split != active) {
-        handled_.push_back(active);
-      }
-      RemoveIntervalAndPotentialOtherHalf(&active_, it);
-      AddSorted(unhandled_, split);
-      return true;
-    }
-  }
-  return false;
-}
-
-// Find the register that is used the last, and spill the interval
-// that holds it. If the first use of `current` is after that register
-// we spill `current` instead.
-bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
-  size_t first_register_use = current->FirstRegisterUse();
-  if (current->HasRegister()) {
-    DCHECK(current->IsHighInterval());
-    // The low interval has allocated the register for the high interval. In
-    // case the low interval had to split both intervals, we may end up in a
-    // situation where the high interval does not have a register use anymore.
-    // We must still proceed in order to split currently active and inactive
-    // uses of the high interval's register, and put the high interval in the
-    // active set.
-    DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
-  } else if (first_register_use == kNoLifetime) {
-    AllocateSpillSlotFor(current);
-    return false;
-  }
-
-  // First set all registers as not being used.
-  size_t* next_use = registers_array_;
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    next_use[i] = kMaxLifetimePosition;
-  }
-
-  // For each active interval, find the next use of its register after the
-  // start of current.
-  for (LiveInterval* active : active_) {
-    DCHECK(active->HasRegister());
-    if (active->IsFixed()) {
-      next_use[active->GetRegister()] = current->GetStart();
-    } else {
-      size_t use = active->FirstRegisterUseAfter(current->GetStart());
-      if (use != kNoLifetime) {
-        next_use[active->GetRegister()] = use;
-      }
-    }
-  }
-
-  // For each inactive interval, find the next use of its register after the
-  // start of current.
-  for (LiveInterval* inactive : inactive_) {
-    // Temp/Slow-path-safepoint interval has no holes.
-    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
-    if (!current->IsSplit() && !inactive->IsFixed()) {
-      // Neither current nor inactive are fixed.
-      // Thanks to SSA, a non-split interval starting in a hole of an
-      // inactive interval should never intersect with that inactive interval.
-      // Only if it's not fixed though, because fixed intervals don't come from SSA.
-      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-      continue;
-    }
-    DCHECK(inactive->HasRegister());
-    size_t next_intersection = inactive->FirstIntersectionWith(current);
-    if (next_intersection != kNoLifetime) {
-      if (inactive->IsFixed()) {
-        next_use[inactive->GetRegister()] =
-            std::min(next_intersection, next_use[inactive->GetRegister()]);
-      } else {
-        size_t use = inactive->FirstUseAfter(current->GetStart());
-        if (use != kNoLifetime) {
-          next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
-        }
-      }
-    }
-  }
-
-  int reg = kNoRegister;
-  bool should_spill = false;
-  if (current->HasRegister()) {
-    DCHECK(current->IsHighInterval());
-    reg = current->GetRegister();
-    // When allocating the low part, we made sure the high register was available.
-    DCHECK_LT(first_register_use, next_use[reg]);
-  } else if (current->IsLowInterval()) {
-    reg = FindAvailableRegisterPair(next_use, first_register_use);
-    // We should spill if both registers are not available.
-    should_spill = (first_register_use >= next_use[reg])
-      || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
-  } else {
-    DCHECK(!current->IsHighInterval());
-    reg = FindAvailableRegister(next_use, current);
-    should_spill = (first_register_use >= next_use[reg]);
-  }
-
-  DCHECK_NE(reg, kNoRegister);
-  if (should_spill) {
-    DCHECK(!current->IsHighInterval());
-    bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
-    if (is_allocation_at_use_site) {
-      if (!current->IsLowInterval()) {
-        DumpInterval(std::cerr, current);
-        DumpAllIntervals(std::cerr);
-        // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
-        HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
-        CHECK(false) << "There is not enough registers available for "
-          << current->GetParent()->GetDefinedBy()->DebugName() << " "
-          << current->GetParent()->GetDefinedBy()->GetId()
-          << " at " << first_register_use - 1 << " "
-          << (at == nullptr ? "" : at->DebugName());
-      }
-
-      // If we're allocating a register for `current` because the instruction at
-      // that position requires it, but we think we should spill, then there are
-      // non-pair intervals or unaligned pair intervals blocking the allocation.
-      // We split the first interval found, and put ourselves first in the
-      // `unhandled_` list.
-      bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
-                                                              first_register_use,
-                                                              next_use);
-      DCHECK(success);
-      LiveInterval* existing = unhandled_->back();
-      DCHECK(existing->IsHighInterval());
-      DCHECK_EQ(existing->GetLowInterval(), current);
-      unhandled_->push_back(current);
-    } else {
-      // If the first use of that instruction is after the last use of the found
-      // register, we split this interval just before its first register use.
-      AllocateSpillSlotFor(current);
-      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
-      DCHECK(current != split);
-      AddSorted(unhandled_, split);
-    }
-    return false;
-  } else {
-    // Use this register and spill the active and inactives interval that
-    // have that register.
-    current->SetRegister(reg);
-
-    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
-      LiveInterval* active = *it;
-      if (active->GetRegister() == reg) {
-        DCHECK(!active->IsFixed());
-        LiveInterval* split = Split(active, current->GetStart());
-        if (split != active) {
-          handled_.push_back(active);
-        }
-        RemoveIntervalAndPotentialOtherHalf(&active_, it);
-        AddSorted(unhandled_, split);
-        break;
-      }
-    }
-
-    // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
-    for (auto it = inactive_.begin(); it != inactive_.end(); ) {
-      LiveInterval* inactive = *it;
-      bool erased = false;
-      if (inactive->GetRegister() == reg) {
-        if (!current->IsSplit() && !inactive->IsFixed()) {
-          // Neither current nor inactive are fixed.
-          // Thanks to SSA, a non-split interval starting in a hole of an
-          // inactive interval should never intersect with that inactive interval.
-          // Only if it's not fixed though, because fixed intervals don't come from SSA.
-          DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-        } else {
-          size_t next_intersection = inactive->FirstIntersectionWith(current);
-          if (next_intersection != kNoLifetime) {
-            if (inactive->IsFixed()) {
-              LiveInterval* split = Split(current, next_intersection);
-              DCHECK_NE(split, current);
-              AddSorted(unhandled_, split);
-            } else {
-              // Split at the start of `current`, which will lead to splitting
-              // at the end of the lifetime hole of `inactive`.
-              LiveInterval* split = Split(inactive, current->GetStart());
-              // If it's inactive, it must start before the current interval.
-              DCHECK_NE(split, inactive);
-              it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
-              erased = true;
-              handled_.push_back(inactive);
-              AddSorted(unhandled_, split);
-            }
-          }
-        }
-      }
-      // If we have erased the element, `it` already points to the next element.
-      // Otherwise we need to move to the next element.
-      if (!erased) {
-        ++it;
-      }
-    }
-
-    return true;
-  }
-}
-
-void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
-  DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
-  size_t insert_at = 0;
-  for (size_t i = array->size(); i > 0; --i) {
-    LiveInterval* current = (*array)[i - 1u];
-    // High intervals must be processed right after their low equivalent.
-    if (current->StartsAfter(interval) && !current->IsHighInterval()) {
-      insert_at = i;
-      break;
-    } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
-      // Ensure the slow path interval is the last to be processed at its location: we want the
-      // interval to know all live registers at this location.
-      DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
-      insert_at = i;
-      break;
-    }
-  }
-
-  // Insert the high interval before the low, to ensure the low is processed before.
-  auto insert_pos = array->begin() + insert_at;
-  if (interval->HasHighInterval()) {
-    array->insert(insert_pos, { interval->GetHighInterval(), interval });
-  } else if (interval->HasLowInterval()) {
-    array->insert(insert_pos, { interval, interval->GetLowInterval() });
-  } else {
-    array->insert(insert_pos, interval);
+    return new_interval;
   }
 }
 
@@ -1257,748 +279,4 @@
   return Split(interval, block_to->GetLifetimeStart());
 }
 
-LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
-  DCHECK_GE(position, interval->GetStart());
-  DCHECK(!interval->IsDeadAt(position));
-  if (position == interval->GetStart()) {
-    // Spill slot will be allocated when handling `interval` again.
-    interval->ClearRegister();
-    if (interval->HasHighInterval()) {
-      interval->GetHighInterval()->ClearRegister();
-    } else if (interval->HasLowInterval()) {
-      interval->GetLowInterval()->ClearRegister();
-    }
-    return interval;
-  } else {
-    LiveInterval* new_interval = interval->SplitAt(position);
-    if (interval->HasHighInterval()) {
-      LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
-      new_interval->SetHighInterval(high);
-      high->SetLowInterval(new_interval);
-    } else if (interval->HasLowInterval()) {
-      LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
-      new_interval->SetLowInterval(low);
-      low->SetHighInterval(new_interval);
-    }
-    return new_interval;
-  }
-}
-
-void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
-  if (interval->IsHighInterval()) {
-    // The low interval already took care of allocating the spill slot.
-    DCHECK(!interval->GetLowInterval()->HasRegister());
-    DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
-    return;
-  }
-
-  LiveInterval* parent = interval->GetParent();
-
-  // An instruction gets a spill slot for its entire lifetime. If the parent
-  // of this interval already has a spill slot, there is nothing to do.
-  if (parent->HasSpillSlot()) {
-    return;
-  }
-
-  HInstruction* defined_by = parent->GetDefinedBy();
-  DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
-
-  if (defined_by->IsParameterValue()) {
-    // Parameters have their own stack slot.
-    parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
-    return;
-  }
-
-  if (defined_by->IsCurrentMethod()) {
-    parent->SetSpillSlot(0);
-    return;
-  }
-
-  if (defined_by->IsConstant()) {
-    // Constants don't need a spill slot.
-    return;
-  }
-
-  ArenaVector<size_t>* spill_slots = nullptr;
-  switch (interval->GetType()) {
-    case Primitive::kPrimDouble:
-      spill_slots = &double_spill_slots_;
-      break;
-    case Primitive::kPrimLong:
-      spill_slots = &long_spill_slots_;
-      break;
-    case Primitive::kPrimFloat:
-      spill_slots = &float_spill_slots_;
-      break;
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimShort:
-      spill_slots = &int_spill_slots_;
-      break;
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
-  }
-
-  // Find an available spill slot.
-  size_t slot = 0;
-  for (size_t e = spill_slots->size(); slot < e; ++slot) {
-    if ((*spill_slots)[slot] <= parent->GetStart()
-        && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) {
-      break;
-    }
-  }
-
-  size_t end = interval->GetLastSibling()->GetEnd();
-  if (parent->NeedsTwoSpillSlots()) {
-    if (slot + 2u > spill_slots->size()) {
-      // We need a new spill slot.
-      spill_slots->resize(slot + 2u, end);
-    }
-    (*spill_slots)[slot] = end;
-    (*spill_slots)[slot + 1] = end;
-  } else {
-    if (slot == spill_slots->size()) {
-      // We need a new spill slot.
-      spill_slots->push_back(end);
-    } else {
-      (*spill_slots)[slot] = end;
-    }
-  }
-
-  // Note that the exact spill slot location will be computed when we resolve,
-  // that is when we know the number of spill slots for each type.
-  parent->SetSpillSlot(slot);
-}
-
-static bool IsValidDestination(Location destination) {
-  return destination.IsRegister()
-      || destination.IsRegisterPair()
-      || destination.IsFpuRegister()
-      || destination.IsFpuRegisterPair()
-      || destination.IsStackSlot()
-      || destination.IsDoubleStackSlot();
-}
-
-void RegisterAllocator::AllocateSpillSlotForCatchPhi(HPhi* phi) {
-  LiveInterval* interval = phi->GetLiveInterval();
-
-  HInstruction* previous_phi = phi->GetPrevious();
-  DCHECK(previous_phi == nullptr ||
-         previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
-      << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
-
-  if (phi->IsVRegEquivalentOf(previous_phi)) {
-    // This is an equivalent of the previous phi. We need to assign the same
-    // catch phi slot.
-    DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
-    interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
-  } else {
-    // Allocate a new spill slot for this catch phi.
-    // TODO: Reuse spill slots when intervals of phis from different catch
-    //       blocks do not overlap.
-    interval->SetSpillSlot(catch_phi_spill_slots_);
-    catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
-  }
-}
-
-void RegisterAllocator::AddMove(HParallelMove* move,
-                                Location source,
-                                Location destination,
-                                HInstruction* instruction,
-                                Primitive::Type type) const {
-  if (type == Primitive::kPrimLong
-      && codegen_->ShouldSplitLongMoves()
-      // The parallel move resolver knows how to deal with long constants.
-      && !source.IsConstant()) {
-    move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
-    move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
-  } else {
-    move->AddMove(source, destination, type, instruction);
-  }
-}
-
-void RegisterAllocator::AddInputMoveFor(HInstruction* input,
-                                        HInstruction* user,
-                                        Location source,
-                                        Location destination) const {
-  if (source.Equals(destination)) return;
-
-  DCHECK(!user->IsPhi());
-
-  HInstruction* previous = user->GetPrevious();
-  HParallelMove* move = nullptr;
-  if (previous == nullptr
-      || !previous->IsParallelMove()
-      || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(user->GetLifetimePosition());
-    user->GetBlock()->InsertInstructionBefore(move, user);
-  } else {
-    move = previous->AsParallelMove();
-  }
-  DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
-  AddMove(move, source, destination, nullptr, input->GetType());
-}
-
-static bool IsInstructionStart(size_t position) {
-  return (position & 1) == 0;
-}
-
-static bool IsInstructionEnd(size_t position) {
-  return (position & 1) == 1;
-}
-
-void RegisterAllocator::InsertParallelMoveAt(size_t position,
-                                             HInstruction* instruction,
-                                             Location source,
-                                             Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
-  HParallelMove* move;
-  if (at == nullptr) {
-    if (IsInstructionStart(position)) {
-      // Block boundary, don't do anything the connection of split siblings will handle it.
-      return;
-    } else {
-      // Move must happen before the first instruction of the block.
-      at = liveness_.GetInstructionFromPosition((position + 1) / 2);
-      // Note that parallel moves may have already been inserted, so we explicitly
-      // ask for the first instruction of the block: `GetInstructionFromPosition` does
-      // not contain the `HParallelMove` instructions.
-      at = at->GetBlock()->GetFirstInstruction();
-
-      if (at->GetLifetimePosition() < position) {
-        // We may insert moves for split siblings and phi spills at the beginning of the block.
-        // Since this is a different lifetime position, we need to go to the next instruction.
-        DCHECK(at->IsParallelMove());
-        at = at->GetNext();
-      }
-
-      if (at->GetLifetimePosition() != position) {
-        DCHECK_GT(at->GetLifetimePosition(), position);
-        move = new (allocator_) HParallelMove(allocator_);
-        move->SetLifetimePosition(position);
-        at->GetBlock()->InsertInstructionBefore(move, at);
-      } else {
-        DCHECK(at->IsParallelMove());
-        move = at->AsParallelMove();
-      }
-    }
-  } else if (IsInstructionEnd(position)) {
-    // Move must happen after the instruction.
-    DCHECK(!at->IsControlFlow());
-    move = at->GetNext()->AsParallelMove();
-    // This is a parallel move for connecting siblings in a same block. We need to
-    // differentiate it with moves for connecting blocks, and input moves.
-    if (move == nullptr || move->GetLifetimePosition() > position) {
-      move = new (allocator_) HParallelMove(allocator_);
-      move->SetLifetimePosition(position);
-      at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
-    }
-  } else {
-    // Move must happen before the instruction.
-    HInstruction* previous = at->GetPrevious();
-    if (previous == nullptr
-        || !previous->IsParallelMove()
-        || previous->GetLifetimePosition() != position) {
-      // If the previous is a parallel move, then its position must be lower
-      // than the given `position`: it was added just after the non-parallel
-      // move instruction that precedes `instruction`.
-      DCHECK(previous == nullptr
-             || !previous->IsParallelMove()
-             || previous->GetLifetimePosition() < position);
-      move = new (allocator_) HParallelMove(allocator_);
-      move->SetLifetimePosition(position);
-      at->GetBlock()->InsertInstructionBefore(move, at);
-    } else {
-      move = previous->AsParallelMove();
-    }
-  }
-  DCHECK_EQ(move->GetLifetimePosition(), position);
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
-                                                   HInstruction* instruction,
-                                                   Location source,
-                                                   Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
-  HInstruction* last = block->GetLastInstruction();
-  // We insert moves at exit for phi predecessors and connecting blocks.
-  // A block ending with an if or a packed switch cannot branch to a block
-  // with phis because we do not allow critical edges. It can also not connect
-  // a split interval between two blocks: the move has to happen in the successor.
-  DCHECK(!last->IsIf() && !last->IsPackedSwitch());
-  HInstruction* previous = last->GetPrevious();
-  HParallelMove* move;
-  // This is a parallel move for connecting blocks. We need to differentiate
-  // it with moves for connecting siblings in a same block, and output moves.
-  size_t position = last->GetLifetimePosition();
-  if (previous == nullptr || !previous->IsParallelMove()
-      || previous->AsParallelMove()->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    block->InsertInstructionBefore(move, last);
-  } else {
-    move = previous->AsParallelMove();
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
-                                                    HInstruction* instruction,
-                                                    Location source,
-                                                    Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  HInstruction* first = block->GetFirstInstruction();
-  HParallelMove* move = first->AsParallelMove();
-  size_t position = block->GetLifetimeStart();
-  // This is a parallel move for connecting blocks. We need to differentiate
-  // it with moves for connecting siblings in a same block, and input moves.
-  if (move == nullptr || move->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    block->InsertInstructionBefore(move, first);
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
-                                        Location source,
-                                        Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  if (instruction->IsPhi()) {
-    InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
-    return;
-  }
-
-  size_t position = instruction->GetLifetimePosition() + 1;
-  HParallelMove* move = instruction->GetNext()->AsParallelMove();
-  // This is a parallel move for moving the output of an instruction. We need
-  // to differentiate with input moves, moves for connecting siblings in a
-  // and moves for connecting blocks.
-  if (move == nullptr || move->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
-  LiveInterval* current = interval;
-  if (current->HasSpillSlot()
-      && current->HasRegister()
-      // Currently, we spill unconditionnally the current method in the code generators.
-      && !interval->GetDefinedBy()->IsCurrentMethod()) {
-    // We spill eagerly, so move must be at definition.
-    InsertMoveAfter(interval->GetDefinedBy(),
-                    interval->ToLocation(),
-                    interval->NeedsTwoSpillSlots()
-                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
-                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
-  }
-  UsePosition* use = current->GetFirstUse();
-  UsePosition* env_use = current->GetFirstEnvironmentUse();
-
-  // Walk over all siblings, updating locations of use positions, and
-  // connecting them when they are adjacent.
-  do {
-    Location source = current->ToLocation();
-
-    // Walk over all uses covered by this interval, and update the location
-    // information.
-
-    LiveRange* range = current->GetFirstRange();
-    while (range != nullptr) {
-      while (use != nullptr && use->GetPosition() < range->GetStart()) {
-        DCHECK(use->IsSynthesized());
-        use = use->GetNext();
-      }
-      while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
-        DCHECK(!use->GetIsEnvironment());
-        DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
-        if (!use->IsSynthesized()) {
-          LocationSummary* locations = use->GetUser()->GetLocations();
-          Location expected_location = locations->InAt(use->GetInputIndex());
-          // The expected (actual) location may be invalid in case the input is unused. Currently
-          // this only happens for intrinsics.
-          if (expected_location.IsValid()) {
-            if (expected_location.IsUnallocated()) {
-              locations->SetInAt(use->GetInputIndex(), source);
-            } else if (!expected_location.IsConstant()) {
-              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
-            }
-          } else {
-            DCHECK(use->GetUser()->IsInvoke());
-            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
-          }
-        }
-        use = use->GetNext();
-      }
-
-      // Walk over the environment uses, and update their locations.
-      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
-        env_use = env_use->GetNext();
-      }
-
-      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
-        DCHECK(current->CoversSlow(env_use->GetPosition())
-               || (env_use->GetPosition() == range->GetEnd()));
-        HEnvironment* environment = env_use->GetEnvironment();
-        environment->SetLocationAt(env_use->GetInputIndex(), source);
-        env_use = env_use->GetNext();
-      }
-
-      range = range->GetNext();
-    }
-
-    // If the next interval starts just after this one, and has a register,
-    // insert a move.
-    LiveInterval* next_sibling = current->GetNextSibling();
-    if (next_sibling != nullptr
-        && next_sibling->HasRegister()
-        && current->GetEnd() == next_sibling->GetStart()) {
-      Location destination = next_sibling->ToLocation();
-      InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
-    }
-
-    for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
-         safepoint_position != nullptr;
-         safepoint_position = safepoint_position->GetNext()) {
-      DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
-
-      LocationSummary* locations = safepoint_position->GetLocations();
-      if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
-        DCHECK(interval->GetDefinedBy()->IsActualObject())
-            << interval->GetDefinedBy()->DebugName()
-            << "@" << safepoint_position->GetInstruction()->DebugName();
-        locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
-      }
-
-      switch (source.GetKind()) {
-        case Location::kRegister: {
-          locations->AddLiveRegister(source);
-          if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
-            DCHECK_LE(locations->GetNumberOfLiveRegisters(),
-                      maximum_number_of_live_core_registers_ +
-                      maximum_number_of_live_fp_registers_);
-          }
-          if (current->GetType() == Primitive::kPrimNot) {
-            DCHECK(interval->GetDefinedBy()->IsActualObject())
-                << interval->GetDefinedBy()->DebugName()
-                << "@" << safepoint_position->GetInstruction()->DebugName();
-            locations->SetRegisterBit(source.reg());
-          }
-          break;
-        }
-        case Location::kFpuRegister: {
-          locations->AddLiveRegister(source);
-          break;
-        }
-
-        case Location::kRegisterPair:
-        case Location::kFpuRegisterPair: {
-          locations->AddLiveRegister(source.ToLow());
-          locations->AddLiveRegister(source.ToHigh());
-          break;
-        }
-        case Location::kStackSlot:  // Fall-through
-        case Location::kDoubleStackSlot:  // Fall-through
-        case Location::kConstant: {
-          // Nothing to do.
-          break;
-        }
-        default: {
-          LOG(FATAL) << "Unexpected location for object";
-        }
-      }
-    }
-    current = next_sibling;
-  } while (current != nullptr);
-
-  if (kIsDebugBuild) {
-    // Following uses can only be synthesized uses.
-    while (use != nullptr) {
-      DCHECK(use->IsSynthesized());
-      use = use->GetNext();
-    }
-  }
-}
-
-static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
-    HInstruction* instruction) {
-  return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
-         (instruction->IsConstant() || instruction->IsCurrentMethod());
-}
-
-void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
-                                             HBasicBlock* from,
-                                             HBasicBlock* to) const {
-  if (interval->GetNextSibling() == nullptr) {
-    // Nothing to connect. The whole range was allocated to the same location.
-    return;
-  }
-
-  // Find the intervals that cover `from` and `to`.
-  size_t destination_position = to->GetLifetimeStart();
-  size_t source_position = from->GetLifetimeEnd() - 1;
-  LiveInterval* destination = interval->GetSiblingAt(destination_position);
-  LiveInterval* source = interval->GetSiblingAt(source_position);
-
-  if (destination == source) {
-    // Interval was not split.
-    return;
-  }
-
-  LiveInterval* parent = interval->GetParent();
-  HInstruction* defined_by = parent->GetDefinedBy();
-  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
-      (destination == nullptr || !destination->CoversSlow(destination_position))) {
-    // Our live_in fixed point calculation has found that the instruction is live
-    // in the `to` block because it will eventually enter an irreducible loop. Our
-    // live interval computation however does not compute a fixed point, and
-    // therefore will not have a location for that instruction for `to`.
-    // Because the instruction is a constant or the ArtMethod, we don't need to
-    // do anything: it will be materialized in the irreducible loop.
-    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
-        << defined_by->DebugName() << ":" << defined_by->GetId()
-        << " " << from->GetBlockId() << " -> " << to->GetBlockId();
-    return;
-  }
-
-  if (!destination->HasRegister()) {
-    // Values are eagerly spilled. Spill slot already contains appropriate value.
-    return;
-  }
-
-  Location location_source;
-  // `GetSiblingAt` returns the interval whose start and end cover `position`,
-  // but does not check whether the interval is inactive at that position.
-  // The only situation where the interval is inactive at that position is in the
-  // presence of irreducible loops for constants and ArtMethod.
-  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
-      (source == nullptr || !source->CoversSlow(source_position))) {
-    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
-    if (defined_by->IsConstant()) {
-      location_source = defined_by->GetLocations()->Out();
-    } else {
-      DCHECK(defined_by->IsCurrentMethod());
-      location_source = parent->NeedsTwoSpillSlots()
-          ? Location::DoubleStackSlot(parent->GetSpillSlot())
-          : Location::StackSlot(parent->GetSpillSlot());
-    }
-  } else {
-    DCHECK(source != nullptr);
-    DCHECK(source->CoversSlow(source_position));
-    DCHECK(destination->CoversSlow(destination_position));
-    location_source = source->ToLocation();
-  }
-
-  // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
-  // we need to put the moves at the entry of `to`.
-  if (from->GetNormalSuccessors().size() == 1) {
-    InsertParallelMoveAtExitOf(from,
-                               defined_by,
-                               location_source,
-                               destination->ToLocation());
-  } else {
-    DCHECK_EQ(to->GetPredecessors().size(), 1u);
-    InsertParallelMoveAtEntryOf(to,
-                                defined_by,
-                                location_source,
-                                destination->ToLocation());
-  }
-}
-
-void RegisterAllocator::Resolve() {
-  codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
-                                     maximum_number_of_live_core_registers_,
-                                     maximum_number_of_live_fp_registers_,
-                                     reserved_out_slots_,
-                                     codegen_->GetGraph()->GetLinearOrder());
-
-  // Adjust the Out Location of instructions.
-  // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
-  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    LiveInterval* current = instruction->GetLiveInterval();
-    LocationSummary* locations = instruction->GetLocations();
-    Location location = locations->Out();
-    if (instruction->IsParameterValue()) {
-      // Now that we know the frame size, adjust the parameter's location.
-      if (location.IsStackSlot()) {
-        location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
-        current->SetSpillSlot(location.GetStackIndex());
-        locations->UpdateOut(location);
-      } else if (location.IsDoubleStackSlot()) {
-        location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
-        current->SetSpillSlot(location.GetStackIndex());
-        locations->UpdateOut(location);
-      } else if (current->HasSpillSlot()) {
-        current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
-      }
-    } else if (instruction->IsCurrentMethod()) {
-      // The current method is always at offset 0.
-      DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
-    } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
-      DCHECK(current->HasSpillSlot());
-      size_t slot = current->GetSpillSlot()
-                    + GetNumberOfSpillSlots()
-                    + reserved_out_slots_
-                    - catch_phi_spill_slots_;
-      current->SetSpillSlot(slot * kVRegSize);
-    } else if (current->HasSpillSlot()) {
-      // Adjust the stack slot, now that we know the number of them for each type.
-      // The way this implementation lays out the stack is the following:
-      // [parameter slots       ]
-      // [catch phi spill slots ]
-      // [double spill slots    ]
-      // [long spill slots      ]
-      // [float spill slots     ]
-      // [int/ref values        ]
-      // [maximum out values    ] (number of arguments for calls)
-      // [art method            ].
-      size_t slot = current->GetSpillSlot();
-      switch (current->GetType()) {
-        case Primitive::kPrimDouble:
-          slot += long_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimLong:
-          slot += float_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimFloat:
-          slot += int_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimNot:
-        case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
-        case Primitive::kPrimByte:
-        case Primitive::kPrimBoolean:
-        case Primitive::kPrimShort:
-          slot += reserved_out_slots_;
-          break;
-        case Primitive::kPrimVoid:
-          LOG(FATAL) << "Unexpected type for interval " << current->GetType();
-      }
-      current->SetSpillSlot(slot * kVRegSize);
-    }
-
-    Location source = current->ToLocation();
-
-    if (location.IsUnallocated()) {
-      if (location.GetPolicy() == Location::kSameAsFirstInput) {
-        if (locations->InAt(0).IsUnallocated()) {
-          locations->SetInAt(0, source);
-        } else {
-          DCHECK(locations->InAt(0).Equals(source));
-        }
-      }
-      locations->UpdateOut(source);
-    } else {
-      DCHECK(source.Equals(location));
-    }
-  }
-
-  // Connect siblings.
-  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    ConnectSiblings(instruction->GetLiveInterval());
-  }
-
-  // Resolve non-linear control flow across branches. Order does not matter.
-  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    if (block->IsCatchBlock() ||
-        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
-      // Instructions live at the top of catch blocks or irreducible loop header
-      // were forced to spill.
-      if (kIsDebugBuild) {
-        BitVector* live = liveness_.GetLiveInSet(*block);
-        for (uint32_t idx : live->Indexes()) {
-          LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
-          LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
-          // `GetSiblingAt` returns the sibling that contains a position, but there could be
-          // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
-          // position.
-          if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
-            DCHECK(!sibling->HasRegister());
-          }
-        }
-      }
-    } else {
-      BitVector* live = liveness_.GetLiveInSet(*block);
-      for (uint32_t idx : live->Indexes()) {
-        LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
-        for (HBasicBlock* predecessor : block->GetPredecessors()) {
-          ConnectSplitSiblings(interval, predecessor, block);
-        }
-      }
-    }
-  }
-
-  // Resolve phi inputs. Order does not matter.
-  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* current = it.Current();
-    if (current->IsCatchBlock()) {
-      // Catch phi values are set at runtime by the exception delivery mechanism.
-    } else {
-      for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-        HInstruction* phi = inst_it.Current();
-        for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
-          HBasicBlock* predecessor = current->GetPredecessors()[i];
-          DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
-          HInstruction* input = phi->InputAt(i);
-          Location source = input->GetLiveInterval()->GetLocationAt(
-              predecessor->GetLifetimeEnd() - 1);
-          Location destination = phi->GetLiveInterval()->ToLocation();
-          InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
-        }
-      }
-    }
-  }
-
-  // Assign temp locations.
-  for (LiveInterval* temp : temp_intervals_) {
-    if (temp->IsHighInterval()) {
-      // High intervals can be skipped, they are already handled by the low interval.
-      continue;
-    }
-    HInstruction* at = liveness_.GetTempUser(temp);
-    size_t temp_index = liveness_.GetTempIndex(temp);
-    LocationSummary* locations = at->GetLocations();
-    switch (temp->GetType()) {
-      case Primitive::kPrimInt:
-        locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
-        break;
-
-      case Primitive::kPrimDouble:
-        if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
-          Location location = Location::FpuRegisterPairLocation(
-              temp->GetRegister(), temp->GetHighInterval()->GetRegister());
-          locations->SetTempAt(temp_index, location);
-        } else {
-          locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
-        }
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type for temporary location "
-                   << temp->GetType();
-    }
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 58600b7..7e1fff8 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/arena_containers.h"
+#include "base/arena_object.h"
 #include "base/macros.h"
 #include "primitive.h"
 
@@ -29,36 +30,41 @@
 class HGraph;
 class HInstruction;
 class HParallelMove;
-class HPhi;
 class LiveInterval;
 class Location;
 class SsaLivenessAnalysis;
 
 /**
- * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ * Base class for any register allocator.
  */
-class RegisterAllocator {
+class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> {
  public:
-  RegisterAllocator(ArenaAllocator* allocator,
-                    CodeGenerator* codegen,
-                    const SsaLivenessAnalysis& analysis);
+  enum Strategy {
+    kRegisterAllocatorLinearScan,
+    kRegisterAllocatorGraphColor
+  };
+
+  static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
+
+  static RegisterAllocator* Create(ArenaAllocator* allocator,
+                                   CodeGenerator* codegen,
+                                   const SsaLivenessAnalysis& analysis,
+                                   Strategy strategy = kRegisterAllocatorDefault);
+
+  virtual ~RegisterAllocator() = default;
 
   // Main entry point for the register allocator. Given the liveness analysis,
   // allocates registers to live intervals.
-  void AllocateRegisters();
+  virtual void AllocateRegisters() = 0;
 
   // Validate that the register allocator did not allocate the same register to
-  // intervals that intersect each other. Returns false if it did not.
-  bool Validate(bool log_fatal_on_failure) {
-    processing_core_registers_ = true;
-    if (!ValidateInternal(log_fatal_on_failure)) {
-      return false;
-    }
-    processing_core_registers_ = false;
-    return ValidateInternal(log_fatal_on_failure);
-  }
+  // intervals that intersect each other. Returns false if it failed.
+  virtual bool Validate(bool log_fatal_on_failure) = 0;
 
-  // Helper method for validation. Used by unit testing.
+  static bool CanAllocateRegistersFor(const HGraph& graph,
+                                      InstructionSet instruction_set);
+
+  // Verifies that live intervals do not conflict. Used by unit testing.
   static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
                                 size_t number_of_spill_slots,
                                 size_t number_of_out_slots,
@@ -67,178 +73,25 @@
                                 bool processing_core_registers,
                                 bool log_fatal_on_failure);
 
-  static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
-
-  size_t GetNumberOfSpillSlots() const {
-    return int_spill_slots_.size()
-        + long_spill_slots_.size()
-        + float_spill_slots_.size()
-        + double_spill_slots_.size()
-        + catch_phi_spill_slots_;
-  }
-
   static constexpr const char* kRegisterAllocatorPassName = "register";
 
- private:
-  // Main methods of the allocator.
-  void LinearScan();
-  bool TryAllocateFreeReg(LiveInterval* interval);
-  bool AllocateBlockedReg(LiveInterval* interval);
-  void Resolve();
-
-  // Add `interval` in the given sorted list.
-  static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+ protected:
+  RegisterAllocator(ArenaAllocator* allocator,
+                    CodeGenerator* codegen,
+                    const SsaLivenessAnalysis& analysis);
 
   // Split `interval` at the position `position`. The new interval starts at `position`.
-  LiveInterval* Split(LiveInterval* interval, size_t position);
+  // If `position` is at the start of `interval`, returns `interval` with its
+  // register location(s) cleared.
+  static LiveInterval* Split(LiveInterval* interval, size_t position);
 
   // Split `interval` at a position between `from` and `to`. The method will try
   // to find an optimal split position.
   LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
 
-  // Returns whether `reg` is blocked by the code generator.
-  bool IsBlocked(int reg) const;
-
-  // Update the interval for the register in `location` to cover [start, end).
-  void BlockRegister(Location location, size_t start, size_t end);
-  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
-
-  // Allocate a spill slot for the given interval. Should be called in linear
-  // order of interval starting positions.
-  void AllocateSpillSlotFor(LiveInterval* interval);
-
-  // Allocate a spill slot for the given catch phi. Will allocate the same slot
-  // for phis which share the same vreg. Must be called in reverse linear order
-  // of lifetime positions and ascending vreg numbers for correctness.
-  void AllocateSpillSlotForCatchPhi(HPhi* phi);
-
-  // Connect adjacent siblings within blocks.
-  void ConnectSiblings(LiveInterval* interval);
-
-  // Connect siblings between block entries and exits.
-  void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
-
-  // Helper methods to insert parallel moves in the graph.
-  void InsertParallelMoveAtExitOf(HBasicBlock* block,
-                                  HInstruction* instruction,
-                                  Location source,
-                                  Location destination) const;
-  void InsertParallelMoveAtEntryOf(HBasicBlock* block,
-                                   HInstruction* instruction,
-                                   Location source,
-                                   Location destination) const;
-  void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
-  void AddInputMoveFor(HInstruction* input,
-                       HInstruction* user,
-                       Location source,
-                       Location destination) const;
-  void InsertParallelMoveAt(size_t position,
-                            HInstruction* instruction,
-                            Location source,
-                            Location destination) const;
-
-  void AddMove(HParallelMove* move,
-               Location source,
-               Location destination,
-               HInstruction* instruction,
-               Primitive::Type type) const;
-
-  // Helper methods.
-  void AllocateRegistersInternal();
-  void ProcessInstruction(HInstruction* instruction);
-  bool ValidateInternal(bool log_fatal_on_failure) const;
-  void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
-  void DumpAllIntervals(std::ostream& stream) const;
-  int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
-  int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
-  bool IsCallerSaveRegister(int reg) const;
-
-  // Try splitting an active non-pair or unaligned pair interval at the given `position`.
-  // Returns whether it was successful at finding such an interval.
-  bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
-                                                size_t first_register_use,
-                                                size_t* next_use);
-
   ArenaAllocator* const allocator_;
   CodeGenerator* const codegen_;
   const SsaLivenessAnalysis& liveness_;
-
-  // List of intervals for core registers that must be processed, ordered by start
-  // position. Last entry is the interval that has the lowest start position.
-  // This list is initially populated before doing the linear scan.
-  ArenaVector<LiveInterval*> unhandled_core_intervals_;
-
-  // List of intervals for floating-point registers. Same comments as above.
-  ArenaVector<LiveInterval*> unhandled_fp_intervals_;
-
-  // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
-  // or `unhandled_fp_intervals_`.
-  ArenaVector<LiveInterval*>* unhandled_;
-
-  // List of intervals that have been processed.
-  ArenaVector<LiveInterval*> handled_;
-
-  // List of intervals that are currently active when processing a new live interval.
-  // That is, they have a live range that spans the start of the new interval.
-  ArenaVector<LiveInterval*> active_;
-
-  // List of intervals that are currently inactive when processing a new live interval.
-  // That is, they have a lifetime hole that spans the start of the new interval.
-  ArenaVector<LiveInterval*> inactive_;
-
-  // Fixed intervals for physical registers. Such intervals cover the positions
-  // where an instruction requires a specific register.
-  ArenaVector<LiveInterval*> physical_core_register_intervals_;
-  ArenaVector<LiveInterval*> physical_fp_register_intervals_;
-
-  // Intervals for temporaries. Such intervals cover the positions
-  // where an instruction requires a temporary.
-  ArenaVector<LiveInterval*> temp_intervals_;
-
-  // The spill slots allocated for live intervals. We ensure spill slots
-  // are typed to avoid (1) doing moves and swaps between two different kinds
-  // of registers, and (2) swapping between a single stack slot and a double
-  // stack slot. This simplifies the parallel move resolver.
-  ArenaVector<size_t> int_spill_slots_;
-  ArenaVector<size_t> long_spill_slots_;
-  ArenaVector<size_t> float_spill_slots_;
-  ArenaVector<size_t> double_spill_slots_;
-
-  // Spill slots allocated to catch phis. This category is special-cased because
-  // (1) slots are allocated prior to linear scan and in reverse linear order,
-  // (2) equivalent phis need to share slots despite having different types.
-  size_t catch_phi_spill_slots_;
-
-  // Instructions that need a safepoint.
-  ArenaVector<HInstruction*> safepoints_;
-
-  // True if processing core registers. False if processing floating
-  // point registers.
-  bool processing_core_registers_;
-
-  // Number of registers for the current register kind (core or floating point).
-  size_t number_of_registers_;
-
-  // Temporary array, allocated ahead of time for simplicity.
-  size_t* registers_array_;
-
-  // Blocked registers, as decided by the code generator.
-  bool* const blocked_core_registers_;
-  bool* const blocked_fp_registers_;
-
-  // Slots reserved for out arguments.
-  size_t reserved_out_slots_;
-
-  // The maximum live core registers at safepoints.
-  size_t maximum_number_of_live_core_registers_;
-
-  // The maximum live FP registers at safepoints.
-  size_t maximum_number_of_live_fp_registers_;
-
-  ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
-  ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
-
-  DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
new file mode 100644
index 0000000..a21595f
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -0,0 +1,2105 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_graph_color.h"
+
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+#include "thread-inl.h"
+
+namespace art {
+
+// Highest number of registers that we support for any platform. This can be used for std::bitset,
+// for example, which needs to know its size at compile time.
+static constexpr size_t kMaxNumRegs = 32;
+
+// The maximum number of graph coloring attempts before triggering a DCHECK.
+// This is meant to catch changes to the graph coloring algorithm that undermine its forward
+// progress guarantees. Forward progress for the algorithm means splitting live intervals on
+// every graph coloring attempt so that eventually the interference graph will be sparse enough
+// to color. The main threat to forward progress is trying to split short intervals which cannot be
+// split further; this could cause infinite looping because the interference graph would never
+// change. This is avoided by prioritizing short intervals before long ones, so that long
+// intervals are split when coloring fails.
+static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
+
+// We always want to avoid spilling inside loops.
+static constexpr size_t kLoopSpillWeightMultiplier = 10;
+
+// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
+static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
+
+// We avoid moves in blocks that dominate the exit block, since these blocks will
+// be executed on every path through the method.
+static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
+
+enum class CoalesceKind {
+  kAdjacentSibling,       // Prevents moves at interval split points.
+  kFixedOutputSibling,    // Prevents moves from a fixed output location.
+  kFixedInput,            // Prevents moves into a fixed input location.
+  kNonlinearControlFlow,  // Prevents moves between blocks.
+  kPhi,                   // Prevents phi resolution moves.
+  kFirstInput,            // Prevents a single input move.
+  kAnyInput,              // May lead to better instruction selection / smaller encodings.
+};
+
+std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
+  return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
+}
+
+static size_t LoopDepthAt(HBasicBlock* block) {
+  HLoopInformation* loop_info = block->GetLoopInformation();
+  size_t depth = 0;
+  while (loop_info != nullptr) {
+    ++depth;
+    loop_info = loop_info->GetPreHeader()->GetLoopInformation();
+  }
+  return depth;
+}
+
+// Return the runtime cost of inserting a move instruction at the specified location.
+static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
+  HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
+  DCHECK(block != nullptr);
+  size_t cost = 1;
+  if (block->IsSingleJump()) {
+    cost *= kSingleJumpBlockWeightMultiplier;
+  }
+  if (block->Dominates(block->GetGraph()->GetExitBlock())) {
+    cost *= kDominatesExitBlockWeightMultiplier;
+  }
+  for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
+    cost *= kLoopSpillWeightMultiplier;
+  }
+  return cost;
+}
+
+// In general, we estimate coalesce priority by whether it will definitely avoid a move,
+// and by how likely it is to create an interference graph that's harder to color.
+static size_t ComputeCoalescePriority(CoalesceKind kind,
+                                      size_t position,
+                                      const SsaLivenessAnalysis& liveness) {
+  if (kind == CoalesceKind::kAnyInput) {
+    // This type of coalescing can affect instruction selection, but not moves, so we
+    // give it the lowest priority.
+    return 0;
+  } else {
+    return CostForMoveAt(position, liveness);
+  }
+}
+
+enum class CoalesceStage {
+  kWorklist,  // Currently in the iterative coalescing worklist.
+  kActive,    // Not in a worklist, but could be considered again during iterative coalescing.
+  kInactive,  // No longer considered until last-chance coalescing.
+  kDefunct,   // Either the two nodes interfere, or have already been coalesced.
+};
+
+std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
+  return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
+}
+
+// Represents a coalesce opportunity between two nodes.
+struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
+  CoalesceOpportunity(InterferenceNode* a,
+                      InterferenceNode* b,
+                      CoalesceKind kind,
+                      size_t position,
+                      const SsaLivenessAnalysis& liveness)
+        : node_a(a),
+          node_b(b),
+          stage(CoalesceStage::kWorklist),
+          priority(ComputeCoalescePriority(kind, position, liveness)) {}
+
+  // Compare two coalesce opportunities based on their priority.
+  // Return true if lhs has a lower priority than that of rhs.
+  static bool CmpPriority(const CoalesceOpportunity* lhs,
+                          const CoalesceOpportunity* rhs) {
+    return lhs->priority < rhs->priority;
+  }
+
+  InterferenceNode* const node_a;
+  InterferenceNode* const node_b;
+
+  // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
+  // and whether it should still be considered.
+  CoalesceStage stage;
+
+  // The priority of this coalesce opportunity, based on heuristics.
+  const size_t priority;
+};
+
+enum class NodeStage {
+  kInitial,           // Uninitialized.
+  kPrecolored,        // Marks fixed nodes.
+  kSafepoint,         // Marks safepoint nodes.
+  kPrunable,          // Marks uncolored nodes in the interference graph.
+  kSimplifyWorklist,  // Marks non-move-related nodes with degree less than the number of registers.
+  kFreezeWorklist,    // Marks move-related nodes with degree less than the number of registers.
+  kSpillWorklist,     // Marks nodes with degree greater or equal to the number of registers.
+  kPruned             // Marks nodes already pruned from the interference graph.
+};
+
+std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
+  return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
+}
+
+// Returns the estimated cost of spilling a particular live interval.
+static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
+  if (interval->HasRegister()) {
+    // Intervals with a fixed register cannot be spilled.
+    return std::numeric_limits<float>::min();
+  }
+
+  size_t length = interval->GetLength();
+  if (length == 1) {
+    // Tiny intervals should have maximum priority, since they cannot be split any further.
+    return std::numeric_limits<float>::max();
+  }
+
+  size_t use_weight = 0;
+  if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
+    // Cost for spilling at a register definition point.
+    use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
+  }
+
+  UsePosition* use = interval->GetFirstUse();
+  while (use != nullptr && use->GetPosition() <= interval->GetStart()) {
+    // Skip uses before the start of this live interval.
+    use = use->GetNext();
+  }
+
+  while (use != nullptr && use->GetPosition() <= interval->GetEnd()) {
+    if (use->GetUser() != nullptr && use->RequiresRegister()) {
+      // Cost for spilling at a register use point.
+      use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness);
+    }
+    use = use->GetNext();
+  }
+
+  // We divide by the length of the interval because we want to prioritize
+  // short intervals; we do not benefit much if we split them further.
+  return static_cast<float>(use_weight) / static_cast<float>(length);
+}
+
+// Interference nodes make up the interference graph, which is the primary data structure in
+// graph coloring register allocation. Each node represents a single live interval, and contains
+// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
+// pre-colored nodes never contain outgoing edges (only incoming ones).
+//
+// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
+// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
+//
+// Note that a pair interval is represented by a single node in the interference graph, which
+// essentially requires two colors. One consequence of this is that the degree of a node is not
+// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
+// number of colors with which a node could interfere. We model this by giving edges different
+// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
+// For example, the edge between two single nodes will have weight 1. On the other hand,
+// the edge between a single node and a pair node will have weight 2. This is because the pair
+// node could block up to two colors for the single node, and because the single node could
+// block an entire two-register aligned slot for the pair node.
+// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
+// and thus whether it is safe to prune it from the interference graph early on.
+class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
+ public:
+  InterferenceNode(ArenaAllocator* allocator,
+                   LiveInterval* interval,
+                   const SsaLivenessAnalysis& liveness)
+        : stage(NodeStage::kInitial),
+          interval_(interval),
+          adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
+          alias_(this),
+          spill_weight_(ComputeSpillWeight(interval, liveness)),
+          requires_color_(interval->RequiresRegister()),
+          needs_spill_slot_(false) {
+    DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
+  }
+
+  void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) {
+    DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
+    DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
+    DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
+    DCHECK_NE(stage, NodeStage::kPruned);
+    DCHECK_NE(other->stage, NodeStage::kPruned);
+    if (guaranteed_not_interfering_yet) {
+      DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other)
+             == adjacent_nodes_.end());
+      adjacent_nodes_.push_back(other);
+      out_degree_ += EdgeWeightWith(other);
+    } else {
+      auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+      if (it == adjacent_nodes_.end()) {
+        adjacent_nodes_.push_back(other);
+        out_degree_ += EdgeWeightWith(other);
+      }
+    }
+  }
+
+  void RemoveInterference(InterferenceNode* other) {
+    DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
+    DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
+    auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+    if (it != adjacent_nodes_.end()) {
+      adjacent_nodes_.erase(it);
+      out_degree_ -= EdgeWeightWith(other);
+    }
+  }
+
+  bool ContainsInterference(InterferenceNode* other) const {
+    DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
+    DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
+    auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+    return it != adjacent_nodes_.end();
+  }
+
+  LiveInterval* GetInterval() const {
+    return interval_;
+  }
+
+  const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const {
+    return adjacent_nodes_;
+  }
+
+  size_t GetOutDegree() const {
+    // Pre-colored nodes have infinite degree.
+    DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
+    return out_degree_;
+  }
+
+  void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) {
+    coalesce_opportunities_.push_back(opportunity);
+  }
+
+  void ClearCoalesceOpportunities() {
+    coalesce_opportunities_.clear();
+  }
+
+  bool IsMoveRelated() const {
+    for (CoalesceOpportunity* opportunity : coalesce_opportunities_) {
+      if (opportunity->stage == CoalesceStage::kWorklist ||
+          opportunity->stage == CoalesceStage::kActive) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Return whether this node already has a color.
+  // Used to find fixed nodes in the interference graph before coloring.
+  bool IsPrecolored() const {
+    return interval_->HasRegister();
+  }
+
+  bool IsPair() const {
+    return interval_->HasHighInterval();
+  }
+
+  void SetAlias(InterferenceNode* rep) {
+    DCHECK_NE(rep->stage, NodeStage::kPruned);
+    DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
+    alias_ = rep;
+  }
+
+  InterferenceNode* GetAlias() {
+    if (alias_ != this) {
+      // Recurse in order to flatten tree of alias pointers.
+      alias_ = alias_->GetAlias();
+    }
+    return alias_;
+  }
+
+  const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const {
+    return coalesce_opportunities_;
+  }
+
+  float GetSpillWeight() const {
+    return spill_weight_;
+  }
+
+  bool RequiresColor() const {
+    return requires_color_;
+  }
+
+  // We give extra weight to edges adjacent to pair nodes. See the general comment on the
+  // interference graph above.
+  size_t EdgeWeightWith(const InterferenceNode* other) const {
+    return (IsPair() || other->IsPair()) ? 2 : 1;
+  }
+
+  bool NeedsSpillSlot() const {
+    return needs_spill_slot_;
+  }
+
+  void SetNeedsSpillSlot() {
+    needs_spill_slot_ = true;
+  }
+
+  // The current stage of this node, indicating which worklist it belongs to.
+  NodeStage stage;
+
+ private:
+  // The live interval that this node represents.
+  LiveInterval* const interval_;
+
+  // All nodes interfering with this one.
+  // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
+  // set sizes that we encounter. Using a vector leads to much better performance.
+  ArenaVector<InterferenceNode*> adjacent_nodes_;
+
+  // Interference nodes that this node should be coalesced with to reduce moves.
+  ArenaVector<CoalesceOpportunity*> coalesce_opportunities_;
+
+  // The maximum number of colors with which this node could interfere. This could be more than
+  // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
+  // We use "out" degree because incoming edges come from nodes already pruned from the graph,
+  // and do not affect the coloring of this node.
+  // Pre-colored nodes are treated as having infinite degree.
+  size_t out_degree_;
+
+  // The node representing this node in the interference graph.
+  // Initially set to `this`, and only changed if this node is coalesced into another.
+  InterferenceNode* alias_;
+
+  // The cost of splitting and spilling this interval to the stack.
+  // Nodes with a higher spill weight should be prioritized when assigning registers.
+  // This is essentially based on use density and location; short intervals with many uses inside
+  // deeply nested loops have a high spill weight.
+  const float spill_weight_;
+
+  const bool requires_color_;
+
+  bool needs_spill_slot_;
+
+  DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
+};
+
+// The order in which we color nodes is important. To guarantee forward progress,
+// we prioritize intervals that require registers, and after that we prioritize
+// short intervals. That way, if we fail to color a node, it either won't require a
+// register, or it will be a long interval that can be split in order to make the
+// interference graph sparser.
+// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
+// (This metric is secondary to the forward progress requirements above.)
+// TODO: May also want to consider:
+// - Constants (since they can be rematerialized)
+// - Allocated spill slots
+static bool HasGreaterNodePriority(const InterferenceNode* lhs,
+                                   const InterferenceNode* rhs) {
+  // (1) Prioritize the node that requires a color.
+  if (lhs->RequiresColor() != rhs->RequiresColor()) {
+    return lhs->RequiresColor();
+  }
+
+  // (2) Prioritize the interval that has a higher spill weight.
+  return lhs->GetSpillWeight() > rhs->GetSpillWeight();
+}
+
+// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
+// and provides methods for each phase of the attempt.
+class ColoringIteration {
+ public:
+  ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
+                    ArenaAllocator* allocator,
+                    bool processing_core_regs,
+                    size_t num_regs)
+        : register_allocator_(register_allocator),
+          allocator_(allocator),
+          processing_core_regs_(processing_core_regs),
+          num_regs_(num_regs),
+          interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+          spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
+          coalesce_worklist_(CoalesceOpportunity::CmpPriority,
+                             allocator->Adapter(kArenaAllocRegisterAllocator)) {}
+
+  // Use the intervals collected from instructions to construct an
+  // interference graph mapping intervals to adjacency lists.
+  // Also, collect synthesized safepoint nodes, used to keep
+  // track of live intervals across safepoints.
+  // TODO: Should build safepoints elsewhere.
+  void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
+                              const ArenaVector<InterferenceNode*>& physical_nodes,
+                              ArenaVector<InterferenceNode*>* safepoints);
+
+  // Add coalesce opportunities to interference nodes.
+  void FindCoalesceOpportunities();
+
+  // Prune nodes from the interference graph to be colored later. Build
+  // a stack (pruned_nodes) containing these intervals in an order determined
+  // by various heuristics.
+  void PruneInterferenceGraph();
+
+  // Process pruned_intervals_ to color the interference graph, spilling when
+  // necessary. Returns true if successful. Else, some intervals have been
+  // split, and the interference graph should be rebuilt for another attempt.
+  bool ColorInterferenceGraph();
+
+  // Return prunable nodes.
+  // The register allocator will need to access prunable nodes after coloring
+  // in order to tell the code generator which registers have been assigned.
+  const ArenaVector<InterferenceNode*>& GetPrunableNodes() const {
+    return prunable_nodes_;
+  }
+
+ private:
+  // Create a coalesce opportunity between two nodes.
+  void CreateCoalesceOpportunity(InterferenceNode* a,
+                                 InterferenceNode* b,
+                                 CoalesceKind kind,
+                                 size_t position);
+
+  // Add an edge in the interference graph, if valid.
+  // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
+  // when possible.
+  void AddPotentialInterference(InterferenceNode* from,
+                                InterferenceNode* to,
+                                bool guaranteed_not_interfering_yet,
+                                bool both_directions = true);
+
+  // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
+  // may be pruned from the interference graph.
+  void FreezeMoves(InterferenceNode* node);
+
+  // Prune a node from the interference graph, updating worklists if necessary.
+  void PruneNode(InterferenceNode* node);
+
+  // Add coalesce opportunities associated with this node to the coalesce worklist.
+  void EnableCoalesceOpportunities(InterferenceNode* node);
+
+  // If needed, from `node` from the freeze worklist to the simplify worklist.
+  void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
+
+  // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
+  bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
+
+  // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
+  bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
+
+  void Coalesce(CoalesceOpportunity* opportunity);
+
+  // Merge `from` into `into` in the interference graph.
+  void Combine(InterferenceNode* from, InterferenceNode* into);
+
+  // A reference to the register allocator instance,
+  // needed to split intervals and assign spill slots.
+  RegisterAllocatorGraphColor* register_allocator_;
+
+  // An arena allocator used for a single graph coloring attempt.
+  ArenaAllocator* allocator_;
+
+  const bool processing_core_regs_;
+
+  const size_t num_regs_;
+
+  // A map from live intervals to interference nodes.
+  ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
+
+  // Uncolored nodes that should be pruned from the interference graph.
+  ArenaVector<InterferenceNode*> prunable_nodes_;
+
+  // A stack of nodes pruned from the interference graph, waiting to be pruned.
+  ArenaStdStack<InterferenceNode*> pruned_nodes_;
+
+  // A queue containing low degree, non-move-related nodes that can pruned immediately.
+  ArenaDeque<InterferenceNode*> simplify_worklist_;
+
+  // A queue containing low degree, move-related nodes.
+  ArenaDeque<InterferenceNode*> freeze_worklist_;
+
+  // A queue containing high degree nodes.
+  // If we have to prune from the spill worklist, we cannot guarantee
+  // the pruned node a color, so we order the worklist by priority.
+  ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
+
+  // A queue containing coalesce opportunities.
+  // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
+  // inside of loops) are more important than others.
+  ArenaPriorityQueue<CoalesceOpportunity*,
+                     decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
+
+  DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
+};
+
+static bool IsCoreInterval(LiveInterval* interval) {
+  return !Primitive::IsFloatingPointType(interval->GetType());
+}
+
+static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
+  return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
+}
+
+RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+                                                         CodeGenerator* codegen,
+                                                         const SsaLivenessAnalysis& liveness,
+                                                         bool iterative_move_coalescing)
+      : RegisterAllocator(allocator, codegen, liveness),
+        iterative_move_coalescing_(iterative_move_coalescing),
+        core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        num_int_spill_slots_(0),
+        num_double_spill_slots_(0),
+        num_float_spill_slots_(0),
+        num_long_spill_slots_(0),
+        catch_phi_spill_slot_counter_(0),
+        reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
+        reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()),
+        number_of_globally_blocked_core_regs_(0),
+        number_of_globally_blocked_fp_regs_(0),
+        max_safepoint_live_core_regs_(0),
+        max_safepoint_live_fp_regs_(0) {
+  // Before we ask for blocked registers, set them up in the code generator.
+  codegen->SetupBlockedRegisters();
+
+  // Initialize physical core register live intervals and blocked registers.
+  // This includes globally blocked registers, such as the stack pointer.
+  physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt);
+    physical_core_nodes_[i] =
+        new (allocator_) InterferenceNode(allocator_, interval, liveness);
+    physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
+    core_intervals_.push_back(interval);
+    if (codegen_->IsBlockedCoreRegister(i)) {
+      ++number_of_globally_blocked_core_regs_;
+      interval->AddRange(0, liveness.GetMaxLifetimePosition());
+    }
+  }
+  // Initialize physical floating point register live intervals and blocked registers.
+  physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat);
+    physical_fp_nodes_[i] =
+        new (allocator_) InterferenceNode(allocator_, interval, liveness);
+    physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
+    fp_intervals_.push_back(interval);
+    if (codegen_->IsBlockedFloatingPointRegister(i)) {
+      ++number_of_globally_blocked_fp_regs_;
+      interval->AddRange(0, liveness.GetMaxLifetimePosition());
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::AllocateRegisters() {
+  // (1) Collect and prepare live intervals.
+  ProcessInstructions();
+
+  for (bool processing_core_regs : {true, false}) {
+    ArenaVector<LiveInterval*>& intervals = processing_core_regs
+        ? core_intervals_
+        : fp_intervals_;
+    size_t num_registers = processing_core_regs
+        ? codegen_->GetNumberOfCoreRegisters()
+        : codegen_->GetNumberOfFloatingPointRegisters();
+
+    size_t attempt = 0;
+    while (true) {
+      ++attempt;
+      DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
+          << "Exceeded debug max graph coloring register allocation attempts. "
+          << "This could indicate that the register allocator is not making forward progress, "
+          << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
+          << "should be prioritized over long ones, because they cannot be split further.)";
+
+      // Many data structures are cleared between graph coloring attempts, so we reduce
+      // total memory usage by using a new arena allocator for each attempt.
+      ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool());
+      ColoringIteration iteration(this,
+                                  &coloring_attempt_allocator,
+                                  processing_core_regs,
+                                  num_registers);
+
+      // (2) Build the interference graph. Also gather safepoints.
+      ArenaVector<InterferenceNode*> safepoints(
+          coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator));
+      ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
+          ? physical_core_nodes_
+          : physical_fp_nodes_;
+      iteration.BuildInterferenceGraph(intervals, physical_nodes, &safepoints);
+
+      // (3) Add coalesce opportunities.
+      //     If we have tried coloring the graph a suspiciously high number of times, give
+      //     up on move coalescing, just in case the coalescing heuristics are not conservative.
+      //     (This situation will be caught if DCHECKs are turned on.)
+      if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
+        iteration.FindCoalesceOpportunities();
+      }
+
+      // (4) Prune all uncolored nodes from interference graph.
+      iteration.PruneInterferenceGraph();
+
+      // (5) Color pruned nodes based on interferences.
+      bool successful = iteration.ColorInterferenceGraph();
+
+      // We manually clear coalesce opportunities for physical nodes,
+      // since they persist across coloring attempts.
+      for (InterferenceNode* node : physical_core_nodes_) {
+        node->ClearCoalesceOpportunities();
+      }
+      for (InterferenceNode* node : physical_fp_nodes_) {
+        node->ClearCoalesceOpportunities();
+      }
+
+      if (successful) {
+        // Assign spill slots.
+        AllocateSpillSlots(iteration.GetPrunableNodes());
+
+        // Compute the maximum number of live registers across safepoints.
+        // Notice that we do not count globally blocked registers, such as the stack pointer.
+        if (safepoints.size() > 0) {
+          size_t max_safepoint_live_regs = ComputeMaxSafepointLiveRegisters(safepoints);
+          if (processing_core_regs) {
+            max_safepoint_live_core_regs_ =
+                max_safepoint_live_regs - number_of_globally_blocked_core_regs_;
+          } else {
+            max_safepoint_live_fp_regs_=
+                max_safepoint_live_regs - number_of_globally_blocked_fp_regs_;
+          }
+        }
+
+        // Tell the code generator which registers were allocated.
+        // We only look at prunable_nodes because we already told the code generator about
+        // fixed intervals while processing instructions. We also ignore the fixed intervals
+        // placed at the top of catch blocks.
+        for (InterferenceNode* node : iteration.GetPrunableNodes()) {
+          LiveInterval* interval = node->GetInterval();
+          if (interval->HasRegister()) {
+            Location low_reg = processing_core_regs
+                ? Location::RegisterLocation(interval->GetRegister())
+                : Location::FpuRegisterLocation(interval->GetRegister());
+            codegen_->AddAllocatedRegister(low_reg);
+            if (interval->HasHighInterval()) {
+              LiveInterval* high = interval->GetHighInterval();
+              DCHECK(high->HasRegister());
+              Location high_reg = processing_core_regs
+                  ? Location::RegisterLocation(high->GetRegister())
+                  : Location::FpuRegisterLocation(high->GetRegister());
+              codegen_->AddAllocatedRegister(high_reg);
+            }
+          } else {
+            DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
+          }
+        }
+
+        break;
+      }
+    }  // while unsuccessful
+  }  // for processing_core_instructions
+
+  // (6) Resolve locations and deconstruct SSA form.
+  RegisterAllocationResolver(allocator_, codegen_, liveness_)
+      .Resolve(max_safepoint_live_core_regs_,
+               max_safepoint_live_fp_regs_,
+               reserved_art_method_slots_ + reserved_out_slots_,
+               num_int_spill_slots_,
+               num_long_spill_slots_,
+               num_float_spill_slots_,
+               num_double_spill_slots_,
+               catch_phi_spill_slot_counter_,
+               temp_intervals_);
+
+  if (kIsDebugBuild) {
+    Validate(/*log_fatal_on_failure*/ true);
+  }
+}
+
+bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
+  for (bool processing_core_regs : {true, false}) {
+    ArenaVector<LiveInterval*> intervals(
+        allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+    for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+      HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+      LiveInterval* interval = instruction->GetLiveInterval();
+      if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
+        intervals.push_back(instruction->GetLiveInterval());
+      }
+    }
+
+    ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
+        ? physical_core_nodes_
+        : physical_fp_nodes_;
+    for (InterferenceNode* fixed : physical_nodes) {
+      LiveInterval* interval = fixed->GetInterval();
+      if (interval->GetFirstRange() != nullptr) {
+        // Ideally we would check fixed ranges as well, but currently there are times when
+        // two fixed intervals for the same register will overlap. For example, a fixed input
+        // and a fixed output may sometimes share the same register, in which there will be two
+        // fixed intervals for the same place.
+      }
+    }
+
+    for (LiveInterval* temp : temp_intervals_) {
+      if (IsCoreInterval(temp) == processing_core_regs) {
+        intervals.push_back(temp);
+      }
+    }
+
+    size_t spill_slots = num_int_spill_slots_
+                       + num_long_spill_slots_
+                       + num_float_spill_slots_
+                       + num_double_spill_slots_
+                       + catch_phi_spill_slot_counter_;
+    bool ok = ValidateIntervals(intervals,
+                                spill_slots,
+                                reserved_art_method_slots_ + reserved_out_slots_,
+                                *codegen_,
+                                allocator_,
+                                processing_core_regs,
+                                log_fatal_on_failure);
+    if (!ok) {
+      return false;
+    }
+  }  // for processing_core_regs
+
+  return true;
+}
+
+void RegisterAllocatorGraphColor::ProcessInstructions() {
+  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+
+    // Note that we currently depend on this ordering, since some helper
+    // code is designed for linear scan register allocation.
+    for (HBackwardInstructionIterator instr_it(block->GetInstructions());
+          !instr_it.Done();
+          instr_it.Advance()) {
+      ProcessInstruction(instr_it.Current());
+    }
+
+    for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+      ProcessInstruction(phi_it.Current());
+    }
+
+    if (block->IsCatchBlock()
+        || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // By blocking all registers at the top of each catch block or irreducible loop, we force
+      // intervals belonging to the live-in set of the catch/header block to be spilled.
+      // TODO(ngeoffray): Phis in this block could be allocated in register.
+      size_t position = block->GetLifetimeStart();
+      BlockRegisters(position, position + 1);
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations == nullptr) {
+    return;
+  }
+  if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
+    // We do this here because we do not want the suspend check to artificially
+    // create live registers.
+    DCHECK(instruction->IsSuspendCheckEntry());
+    DCHECK_EQ(locations->GetTempCount(), 0u);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
+  }
+
+  CheckForTempLiveIntervals(instruction);
+  CheckForSafepoint(instruction);
+  if (instruction->GetLocations()->WillCall()) {
+    // If a call will happen, create fixed intervals for caller-save registers.
+    // TODO: Note that it may be beneficial to later split intervals at this point,
+    //       so that we allow last-minute moves from a caller-save register
+    //       to a callee-save register.
+    BlockRegisters(instruction->GetLifetimePosition(),
+                   instruction->GetLifetimePosition() + 1,
+                   /*caller_save_only*/ true);
+  }
+  CheckForFixedInputs(instruction);
+
+  LiveInterval* interval = instruction->GetLiveInterval();
+  if (interval == nullptr) {
+    // Instructions lacking a valid output location do not have a live interval.
+    DCHECK(!locations->Out().IsValid());
+    return;
+  }
+
+  // Low intervals act as representatives for their corresponding high interval.
+  DCHECK(!interval->IsHighInterval());
+  if (codegen_->NeedsTwoRegisters(interval->GetType())) {
+    interval->AddHighInterval();
+  }
+  AddSafepointsFor(instruction);
+  CheckForFixedOutput(instruction);
+  AllocateSpillSlotForCatchPhi(instruction);
+
+  ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
+      ? core_intervals_
+      : fp_intervals_;
+  if (interval->HasSpillSlot() || instruction->IsConstant()) {
+    // Note that if an interval already has a spill slot, then its value currently resides
+    // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
+    // register use. This is also true for constants, which can be materialized at any point.
+    size_t first_register_use = interval->FirstRegisterUse();
+    if (first_register_use != kNoLifetime) {
+      LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
+      intervals.push_back(split);
+    } else {
+      // We won't allocate a register for this value.
+    }
+  } else {
+    intervals.push_back(interval);
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
+  // We simply block physical registers where necessary.
+  // TODO: Ideally we would coalesce the physical register with the register
+  //       allocated to the input value, but this can be tricky if, e.g., there
+  //       could be multiple physical register uses of the same value at the
+  //       same instruction. Furthermore, there's currently no distinction between
+  //       fixed inputs to a call (which will be clobbered) and other fixed inputs (which
+  //       may not be clobbered).
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+    Location input = locations->InAt(i);
+    if (input.IsRegister() || input.IsFpuRegister()) {
+      BlockRegister(input, position, position + 1);
+      codegen_->AddAllocatedRegister(input);
+    } else if (input.IsPair()) {
+      BlockRegister(input.ToLow(), position, position + 1);
+      BlockRegister(input.ToHigh(), position, position + 1);
+      codegen_->AddAllocatedRegister(input.ToLow());
+      codegen_->AddAllocatedRegister(input.ToHigh());
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
+  // If an instruction has a fixed output location, we give the live interval a register and then
+  // proactively split it just after the definition point to avoid creating too many interferences
+  // with a fixed node.
+  LiveInterval* interval = instruction->GetLiveInterval();
+  Location out = interval->GetDefinedBy()->GetLocations()->Out();
+  size_t position = instruction->GetLifetimePosition();
+  DCHECK_GE(interval->GetEnd() - position, 2u);
+
+  if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+    out = instruction->GetLocations()->InAt(0);
+  }
+
+  if (out.IsRegister() || out.IsFpuRegister()) {
+    interval->SetRegister(out.reg());
+    codegen_->AddAllocatedRegister(out);
+    Split(interval, position + 1);
+  } else if (out.IsPair()) {
+    interval->SetRegister(out.low());
+    interval->GetHighInterval()->SetRegister(out.high());
+    codegen_->AddAllocatedRegister(out.ToLow());
+    codegen_->AddAllocatedRegister(out.ToHigh());
+    Split(interval, position + 1);
+  } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
+    interval->SetSpillSlot(out.GetStackIndex());
+  } else {
+    DCHECK(out.IsUnallocated() || out.IsConstant());
+  }
+}
+
+void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
+  LiveInterval* interval = instruction->GetLiveInterval();
+  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+    size_t safepoint_position = safepoint->GetLifetimePosition();
+
+    // Test that safepoints_ are ordered in the optimal way.
+    DCHECK(safepoint_index == safepoints_.size() ||
+           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+    if (safepoint_position == interval->GetStart()) {
+      // The safepoint is for this instruction, so the location of the instruction
+      // does not need to be saved.
+      DCHECK_EQ(safepoint_index, safepoints_.size());
+      DCHECK_EQ(safepoint, instruction);
+      continue;
+    } else if (interval->IsDeadAt(safepoint_position)) {
+      break;
+    } else if (!interval->Covers(safepoint_position)) {
+      // Hole in the interval.
+      continue;
+    }
+    interval->AddSafepoint(safepoint);
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+    Location temp = locations->GetTemp(i);
+    if (temp.IsRegister() || temp.IsFpuRegister()) {
+      BlockRegister(temp, position, position + 1);
+      codegen_->AddAllocatedRegister(temp);
+    } else {
+      DCHECK(temp.IsUnallocated());
+      switch (temp.GetPolicy()) {
+        case Location::kRequiresRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+          interval->AddTempUse(instruction, i);
+          core_intervals_.push_back(interval);
+          temp_intervals_.push_back(interval);
+          break;
+        }
+
+        case Location::kRequiresFpuRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+          interval->AddTempUse(instruction, i);
+          fp_intervals_.push_back(interval);
+          temp_intervals_.push_back(interval);
+          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+            interval->AddHighInterval(/*is_temp*/ true);
+            temp_intervals_.push_back(interval->GetHighInterval());
+          }
+          break;
+        }
+
+        default:
+          LOG(FATAL) << "Unexpected policy for temporary location "
+                     << temp.GetPolicy();
+      }
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+
+  if (locations->NeedsSafepoint()) {
+    safepoints_.push_back(instruction);
+    if (locations->OnlyCallsOnSlowPath()) {
+      // We add a synthesized range at this position to record the live registers
+      // at this position. Ideally, we could just update the safepoints when locations
+      // are updated, but we currently need to know the full stack size before updating
+      // locations (because of parameters and the fact that we don't have a frame pointer).
+      // And knowing the full stack size requires to know the maximum number of live
+      // registers at calls in slow paths.
+      // By adding the following interval in the algorithm, we can compute this
+      // maximum before updating locations.
+      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+      interval->AddRange(position, position + 1);
+      core_intervals_.push_back(interval);
+      fp_intervals_.push_back(interval);
+    }
+  }
+}
+
+LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
+  if (interval->GetStart() < position && position < interval->GetEnd()) {
+    return Split(interval, position);
+  } else {
+    return interval;
+  }
+}
+
+void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
+  DCHECK(!interval->IsHighInterval());
+
+  // Split just after a register definition.
+  if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
+    interval = TrySplit(interval, interval->GetStart() + 1);
+  }
+
+  UsePosition* use = interval->GetFirstUse();
+  while (use != nullptr && use->GetPosition() < interval->GetStart()) {
+    use = use->GetNext();
+  }
+
+  // Split around register uses.
+  size_t end = interval->GetEnd();
+  while (use != nullptr && use->GetPosition() <= end) {
+    if (use->RequiresRegister()) {
+      size_t position = use->GetPosition();
+      interval = TrySplit(interval, position - 1);
+      if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
+        // If we are at the very end of a basic block, we cannot split right
+        // at the use. Split just after instead.
+        interval = TrySplit(interval, position + 1);
+      } else {
+        interval = TrySplit(interval, position);
+      }
+    }
+    use = use->GetNext();
+  }
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
+  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+    HPhi* phi = instruction->AsPhi();
+    LiveInterval* interval = phi->GetLiveInterval();
+
+    HInstruction* previous_phi = phi->GetPrevious();
+    DCHECK(previous_phi == nullptr ||
+           previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+        << "Phis expected to be sorted by vreg number, "
+        << "so that equivalent phis are adjacent.";
+
+    if (phi->IsVRegEquivalentOf(previous_phi)) {
+      // Assign the same spill slot.
+      DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+      interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+    } else {
+      interval->SetSpillSlot(catch_phi_spill_slot_counter_);
+      catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+    }
+  }
+}
+
+void RegisterAllocatorGraphColor::BlockRegister(Location location,
+                                                size_t start,
+                                                size_t end) {
+  DCHECK(location.IsRegister() || location.IsFpuRegister());
+  int reg = location.reg();
+  LiveInterval* interval = location.IsRegister()
+      ? physical_core_nodes_[reg]->GetInterval()
+      : physical_fp_nodes_[reg]->GetInterval();
+  DCHECK(interval->GetRegister() == reg);
+  bool blocked_by_codegen = location.IsRegister()
+      ? codegen_->IsBlockedCoreRegister(reg)
+      : codegen_->IsBlockedFloatingPointRegister(reg);
+  if (blocked_by_codegen) {
+    // We've already blocked this register for the entire method. (And adding a
+    // range inside another range violates the preconditions of AddRange).
+  } else {
+    interval->AddRange(start, end);
+  }
+}
+
+void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+      BlockRegister(Location::RegisterLocation(i), start, end);
+    }
+  }
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+      BlockRegister(Location::FpuRegisterLocation(i), start, end);
+    }
+  }
+}
+
+void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
+                                                 InterferenceNode* to,
+                                                 bool guaranteed_not_interfering_yet,
+                                                 bool both_directions) {
+  if (from->IsPrecolored()) {
+    // We save space by ignoring outgoing edges from fixed nodes.
+  } else if (to->GetInterval()->IsSlowPathSafepoint()) {
+    // Safepoint intervals are only there to count max live registers,
+    // so no need to give them incoming interference edges.
+    // This is also necessary for correctness, because we don't want nodes
+    // to remove themselves from safepoint adjacency sets when they're pruned.
+  } else if (to->IsPrecolored()) {
+    // It is important that only a single node represents a given fixed register in the
+    // interference graph. We retrieve that node here.
+    const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint()
+        ? register_allocator_->physical_fp_nodes_
+        : register_allocator_->physical_core_nodes_;
+    InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
+    from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false);
+    DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
+    DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
+
+    // If a node interferes with a fixed pair node, the weight of the edge may
+    // be inaccurate after using the alias of the pair node, because the alias of the pair node
+    // is a singular node.
+    // We could make special pair fixed nodes, but that ends up being too conservative because
+    // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
+    // three rather than two.
+    // Instead, we explicitly add an interference with the high node of the fixed pair node.
+    // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
+    //       can be unaligned on x86 complicates things.
+    if (to->IsPair()) {
+      InterferenceNode* high_node =
+          physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
+      DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
+                high_node->GetInterval()->GetRegister());
+      from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false);
+    }
+  } else {
+    // Standard interference between two uncolored nodes.
+    from->AddInterference(to, guaranteed_not_interfering_yet);
+  }
+
+  if (both_directions) {
+    AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
+  }
+}
+
+// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
+// is allowed to have the same register as the input interval.
+// TODO: Ideally we should just produce correct intervals in liveness analysis.
+//       We would need to refactor the current live interval layout to do so, which is
+//       no small task.
+static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
+  LiveInterval* output_interval = out_node->GetInterval();
+  HInstruction* defined_by = output_interval->GetDefinedBy();
+  if (defined_by == nullptr) {
+    // This must not be a definition point.
+    return false;
+  }
+
+  LocationSummary* locations = defined_by->GetLocations();
+  if (locations->OutputCanOverlapWithInputs()) {
+    // This instruction does not allow the output to reuse a register from an input.
+    return false;
+  }
+
+  LiveInterval* input_interval = in_node->GetInterval();
+  LiveInterval* next_sibling = input_interval->GetNextSibling();
+  size_t def_position = defined_by->GetLifetimePosition();
+  size_t use_position = def_position + 1;
+  if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
+    // The next sibling starts at the use position, so reusing the input register in the output
+    // would clobber the input before it's moved into the sibling interval location.
+    return false;
+  }
+
+  if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
+    // The input interval is live after the use position.
+    return false;
+  }
+
+  HInputsRef inputs = defined_by->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
+      DCHECK(input_interval->SameRegisterKind(*output_interval));
+      return true;
+    }
+  }
+
+  // The input interval was not an input for this instruction.
+  return false;
+}
+
+void ColoringIteration::BuildInterferenceGraph(
+    const ArenaVector<LiveInterval*>& intervals,
+    const ArenaVector<InterferenceNode*>& physical_nodes,
+    ArenaVector<InterferenceNode*>* safepoints) {
+  DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
+  // Build the interference graph efficiently by ordering range endpoints
+  // by position and doing a linear sweep to find interferences. (That is, we
+  // jump from endpoint to endpoint, maintaining a set of intervals live at each
+  // point. If two nodes are ever in the live set at the same time, then they
+  // interfere with each other.)
+  //
+  // We order by both position and (secondarily) by whether the endpoint
+  // begins or ends a range; we want to process range endings before range
+  // beginnings at the same position because they should not conflict.
+  //
+  // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
+  // Tuple contents: (position, is_range_beginning, node).
+  ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
+      allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // We reserve plenty of space to avoid excessive copying.
+  range_endpoints.reserve(4 * prunable_nodes_.size());
+
+  for (LiveInterval* parent : intervals) {
+    for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
+      LiveRange* range = sibling->GetFirstRange();
+      if (range != nullptr) {
+        InterferenceNode* node = new (allocator_) InterferenceNode(
+            allocator_, sibling, register_allocator_->liveness_);
+        interval_node_map_.Insert(std::make_pair(sibling, node));
+
+        if (sibling->HasRegister()) {
+          // Fixed nodes should alias the canonical node for the corresponding register.
+          node->stage = NodeStage::kPrecolored;
+          InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
+          node->SetAlias(physical_node);
+          DCHECK_EQ(node->GetInterval()->GetRegister(),
+                    physical_node->GetInterval()->GetRegister());
+        } else if (sibling->IsSlowPathSafepoint()) {
+          // Safepoint intervals are synthesized to count max live registers.
+          // They will be processed separately after coloring.
+          node->stage = NodeStage::kSafepoint;
+          safepoints->push_back(node);
+        } else {
+          node->stage = NodeStage::kPrunable;
+          prunable_nodes_.push_back(node);
+        }
+
+        while (range != nullptr) {
+          range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
+          range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
+          range = range->GetNext();
+        }
+      }
+    }
+  }
+
+  // Sort the endpoints.
+  // We explicitly ignore the third entry of each tuple (the node pointer) in order
+  // to maintain determinism.
+  std::sort(range_endpoints.begin(), range_endpoints.end(),
+            [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
+                const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
+    return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+         < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+  });
+
+  // Nodes live at the current position in the linear sweep.
+  ArenaVector<InterferenceNode*> live(
+      allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
+  // live set. When we encounter the end of a range, we remove the corresponding node
+  // from the live set. Nodes interfere if they are in the live set at the same time.
+  for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
+    bool is_range_beginning;
+    InterferenceNode* node;
+    size_t position;
+    // Extract information from the tuple, including the node this tuple represents.
+    std::tie(position, is_range_beginning, node) = *it;
+
+    if (is_range_beginning) {
+      bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
+      for (InterferenceNode* conflicting : live) {
+        DCHECK_NE(node, conflicting);
+        if (CheckInputOutputCanOverlap(conflicting, node)) {
+          // We do not add an interference, because the instruction represented by `node` allows
+          // its output to share a register with an input, represented here by `conflicting`.
+        } else {
+          AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
+        }
+      }
+      DCHECK(std::find(live.begin(), live.end(), node) == live.end());
+      live.push_back(node);
+    } else {
+      // End of range.
+      auto live_it = std::find(live.begin(), live.end(), node);
+      DCHECK(live_it != live.end());
+      live.erase(live_it);
+    }
+  }
+  DCHECK(live.empty());
+}
+
+void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
+                                                  InterferenceNode* b,
+                                                  CoalesceKind kind,
+                                                  size_t position) {
+  DCHECK_EQ(a->IsPair(), b->IsPair())
+      << "Nodes of different memory widths should never be coalesced";
+  CoalesceOpportunity* opportunity =
+      new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
+  a->AddCoalesceOpportunity(opportunity);
+  b->AddCoalesceOpportunity(opportunity);
+  coalesce_worklist_.push(opportunity);
+}
+
+// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
+// corresponding to an interval. Note that not all intervals are in this map, notably the parents
+// of constants and stack arguments. (However, these interval should not be involved in coalesce
+// opportunities anyway, because they're not going to be in registers.)
+void ColoringIteration::FindCoalesceOpportunities() {
+  DCHECK(coalesce_worklist_.empty());
+
+  for (InterferenceNode* node : prunable_nodes_) {
+    LiveInterval* interval = node->GetInterval();
+
+    // Coalesce siblings.
+    LiveInterval* next_sibling = interval->GetNextSibling();
+    if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
+      auto it = interval_node_map_.Find(next_sibling);
+      if (it != interval_node_map_.end()) {
+        InterferenceNode* sibling_node = it->second;
+        CreateCoalesceOpportunity(node,
+                                  sibling_node,
+                                  CoalesceKind::kAdjacentSibling,
+                                  interval->GetEnd());
+      }
+    }
+
+    // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
+    LiveInterval* parent = interval->GetParent();
+    if (parent->HasRegister()
+        && parent->GetNextSibling() == interval
+        && parent->GetEnd() == interval->GetStart()) {
+      auto it = interval_node_map_.Find(parent);
+      if (it != interval_node_map_.end()) {
+        InterferenceNode* parent_node = it->second;
+        CreateCoalesceOpportunity(node,
+                                  parent_node,
+                                  CoalesceKind::kFixedOutputSibling,
+                                  parent->GetEnd());
+      }
+    }
+
+    // Try to prevent moves across blocks.
+    // Note that this does not lead to many succeeding coalesce attempts, so could be removed
+    // if found to add to compile time.
+    const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
+    if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
+      // If the start of this interval is at a block boundary, we look at the
+      // location of the interval in blocks preceding the block this interval
+      // starts at. This can avoid a move between the two blocks.
+      HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        size_t position = predecessor->GetLifetimeEnd() - 1;
+        LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
+        if (existing != nullptr) {
+          auto it = interval_node_map_.Find(existing);
+          if (it != interval_node_map_.end()) {
+            InterferenceNode* existing_node = it->second;
+            CreateCoalesceOpportunity(node,
+                                      existing_node,
+                                      CoalesceKind::kNonlinearControlFlow,
+                                      position);
+          }
+        }
+      }
+    }
+
+    // Coalesce phi inputs with the corresponding output.
+    HInstruction* defined_by = interval->GetDefinedBy();
+    if (defined_by != nullptr && defined_by->IsPhi()) {
+      const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors();
+      HInputsRef inputs = defined_by->GetInputs();
+
+      for (size_t i = 0, e = inputs.size(); i < e; ++i) {
+        // We want the sibling at the end of the appropriate predecessor block.
+        size_t position = predecessors[i]->GetLifetimeEnd() - 1;
+        LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
+
+        auto it = interval_node_map_.Find(input_interval);
+        if (it != interval_node_map_.end()) {
+          InterferenceNode* input_node = it->second;
+          CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
+        }
+      }
+    }
+
+    // Coalesce output with first input when policy is kSameAsFirstInput.
+    if (defined_by != nullptr) {
+      Location out = defined_by->GetLocations()->Out();
+      if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+        LiveInterval* input_interval
+            = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
+        // TODO: Could we consider lifetime holes here?
+        if (input_interval->GetEnd() == interval->GetStart()) {
+          auto it = interval_node_map_.Find(input_interval);
+          if (it != interval_node_map_.end()) {
+            InterferenceNode* input_node = it->second;
+            CreateCoalesceOpportunity(node,
+                                      input_node,
+                                      CoalesceKind::kFirstInput,
+                                      interval->GetStart());
+          }
+        }
+      }
+    }
+
+    // An interval that starts an instruction (that is, it is not split), may
+    // re-use the registers used by the inputs of that instruction, based on the
+    // location summary.
+    if (defined_by != nullptr) {
+      DCHECK(!interval->IsSplit());
+      LocationSummary* locations = defined_by->GetLocations();
+      if (!locations->OutputCanOverlapWithInputs()) {
+        HInputsRef inputs = defined_by->GetInputs();
+        for (size_t i = 0; i < inputs.size(); ++i) {
+          size_t def_point = defined_by->GetLifetimePosition();
+          // TODO: Getting the sibling at the def_point might not be quite what we want
+          //       for fixed inputs, since the use will be *at* the def_point rather than after.
+          LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
+          if (input_interval != nullptr &&
+              input_interval->HasHighInterval() == interval->HasHighInterval()) {
+            auto it = interval_node_map_.Find(input_interval);
+            if (it != interval_node_map_.end()) {
+              InterferenceNode* input_node = it->second;
+              CreateCoalesceOpportunity(node,
+                                        input_node,
+                                        CoalesceKind::kAnyInput,
+                                        interval->GetStart());
+            }
+          }
+        }
+      }
+    }
+
+    // Try to prevent moves into fixed input locations.
+    UsePosition* use = interval->GetFirstUse();
+    for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) {
+      // Skip past uses before the start of this interval.
+    }
+    for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) {
+      HInstruction* user = use->GetUser();
+      if (user == nullptr) {
+        // User may be null for certain intervals, such as temp intervals.
+        continue;
+      }
+      LocationSummary* locations = user->GetLocations();
+      Location input = locations->InAt(use->GetInputIndex());
+      if (input.IsRegister() || input.IsFpuRegister()) {
+        // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
+        //       is currently not supported.
+        InterferenceNode* fixed_node = input.IsRegister()
+            ? register_allocator_->physical_core_nodes_[input.reg()]
+            : register_allocator_->physical_fp_nodes_[input.reg()];
+        CreateCoalesceOpportunity(node,
+                                  fixed_node,
+                                  CoalesceKind::kFixedInput,
+                                  user->GetLifetimePosition());
+      }
+    }
+  }  // for node in prunable_nodes
+}
+
+static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
+  return node->GetOutDegree() < num_regs;
+}
+
+static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
+  return !IsLowDegreeNode(node, num_regs);
+}
+
+void ColoringIteration::PruneInterferenceGraph() {
+  DCHECK(pruned_nodes_.empty()
+      && simplify_worklist_.empty()
+      && freeze_worklist_.empty()
+      && spill_worklist_.empty());
+  // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
+  // and all others as high degree nodes. The distinction is important: low degree nodes are
+  // guaranteed a color, while high degree nodes are not.
+
+  // Build worklists. Note that the coalesce worklist has already been
+  // filled by FindCoalesceOpportunities().
+  for (InterferenceNode* node : prunable_nodes_) {
+    DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
+    DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) << "Safepoint nodes should never be pruned";
+    if (IsLowDegreeNode(node, num_regs_)) {
+      if (node->GetCoalesceOpportunities().empty()) {
+        // Simplify Worklist.
+        node->stage = NodeStage::kSimplifyWorklist;
+        simplify_worklist_.push_back(node);
+      } else {
+        // Freeze Worklist.
+        node->stage = NodeStage::kFreezeWorklist;
+        freeze_worklist_.push_back(node);
+      }
+    } else {
+      // Spill worklist.
+      node->stage = NodeStage::kSpillWorklist;
+      spill_worklist_.push(node);
+    }
+  }
+
+  // Prune graph.
+  // Note that we do not remove a node from its current worklist if it moves to another, so it may
+  // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
+  while (true) {
+    if (!simplify_worklist_.empty()) {
+      // Prune low-degree nodes.
+      // TODO: pop_back() should work as well, but it didn't; we get a
+      //       failed check while pruning. We should look into this.
+      InterferenceNode* node = simplify_worklist_.front();
+      simplify_worklist_.pop_front();
+      DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
+      DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
+      DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
+      PruneNode(node);
+    } else if (!coalesce_worklist_.empty()) {
+      // Coalesce.
+      CoalesceOpportunity* opportunity = coalesce_worklist_.top();
+      coalesce_worklist_.pop();
+      if (opportunity->stage == CoalesceStage::kWorklist) {
+        Coalesce(opportunity);
+      }
+    } else if (!freeze_worklist_.empty()) {
+      // Freeze moves and prune a low-degree move-related node.
+      InterferenceNode* node = freeze_worklist_.front();
+      freeze_worklist_.pop_front();
+      if (node->stage == NodeStage::kFreezeWorklist) {
+        DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
+        DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
+        FreezeMoves(node);
+        PruneNode(node);
+      }
+    } else if (!spill_worklist_.empty()) {
+      // We spill the lowest-priority node, because pruning a node earlier
+      // gives it a higher chance of being spilled.
+      InterferenceNode* node = spill_worklist_.top();
+      spill_worklist_.pop();
+      if (node->stage == NodeStage::kSpillWorklist) {
+        DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
+        FreezeMoves(node);
+        PruneNode(node);
+      }
+    } else {
+      // Pruning complete.
+      break;
+    }
+  }
+  DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
+}
+
+void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
+  for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+    if (opportunity->stage == CoalesceStage::kActive) {
+      opportunity->stage = CoalesceStage::kWorklist;
+      coalesce_worklist_.push(opportunity);
+    }
+  }
+}
+
+void ColoringIteration::PruneNode(InterferenceNode* node) {
+  DCHECK_NE(node->stage, NodeStage::kPruned);
+  DCHECK(!node->IsPrecolored());
+  node->stage = NodeStage::kPruned;
+  pruned_nodes_.push(node);
+
+  for (InterferenceNode* adj : node->GetAdjacentNodes()) {
+    DCHECK(!adj->GetInterval()->IsSlowPathSafepoint())
+        << "Nodes should never interfere with synthesized safepoint nodes";
+    DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
+
+    if (adj->IsPrecolored()) {
+      // No effect on pre-colored nodes; they're never pruned.
+    } else {
+      // Remove the interference.
+      bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
+      DCHECK(adj->ContainsInterference(node))
+          << "Missing reflexive interference from non-fixed node";
+      adj->RemoveInterference(node);
+
+      // Handle transitions from high degree to low degree.
+      if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
+        EnableCoalesceOpportunities(adj);
+        for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
+          EnableCoalesceOpportunities(adj_adj);
+        }
+
+        DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
+        if (adj->IsMoveRelated()) {
+          adj->stage = NodeStage::kFreezeWorklist;
+          freeze_worklist_.push_back(adj);
+        } else {
+          adj->stage = NodeStage::kSimplifyWorklist;
+          simplify_worklist_.push_back(adj);
+        }
+      }
+    }
+  }
+}
+
+void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
+  if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
+    DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
+    node->stage = NodeStage::kSimplifyWorklist;
+    simplify_worklist_.push_back(node);
+  }
+}
+
+void ColoringIteration::FreezeMoves(InterferenceNode* node) {
+  for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+    if (opportunity->stage == CoalesceStage::kDefunct) {
+      // Constrained moves should remain constrained, since they will not be considered
+      // during last-chance coalescing.
+    } else {
+      opportunity->stage = CoalesceStage::kInactive;
+    }
+    InterferenceNode* other = opportunity->node_a->GetAlias() == node
+        ? opportunity->node_b->GetAlias()
+        : opportunity->node_a->GetAlias();
+    if (other != node && other->stage == NodeStage::kFreezeWorklist) {
+      DCHECK(IsLowDegreeNode(node, num_regs_));
+      CheckTransitionFromFreezeWorklist(other);
+    }
+  }
+}
+
+bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
+                                            InterferenceNode* into) {
+  if (!into->IsPrecolored()) {
+    // The uncolored heuristic will cover this case.
+    return false;
+  }
+  if (from->IsPair() || into->IsPair()) {
+    // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
+    //       are currently represented as two single fixed nodes in the graph, and `into` is
+    //       only one of them. (We may lose the implicit connections to the second one in a merge.)
+    return false;
+  }
+
+  // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
+  // Reasons an adjacent node `adj` can be "ok":
+  // (1) If `adj` is low degree, interference with `into` will not affect its existing
+  //     colorable guarantee. (Notice that coalescing cannot increase its degree.)
+  // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
+  // (3) If there's already an interference with `into`, coalescing will not add interferences.
+  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+    if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
+      // Ok.
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
+                                           InterferenceNode* into) {
+  if (into->IsPrecolored()) {
+    // The pre-colored heuristic will handle this case.
+    return false;
+  }
+
+  // Arbitrary cap to improve compile time. Tests show that this has negligible affect
+  // on generated code.
+  if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
+    return false;
+  }
+
+  // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
+  // of high degree. (Low degree neighbors can be ignored, because they will eventually be
+  // pruned from the interference graph in the simplify stage.)
+  size_t high_degree_interferences = 0;
+  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+    if (IsHighDegreeNode(adj, num_regs_)) {
+      high_degree_interferences += from->EdgeWeightWith(adj);
+    }
+  }
+  for (InterferenceNode* adj : into->GetAdjacentNodes()) {
+    if (IsHighDegreeNode(adj, num_regs_)) {
+      if (from->ContainsInterference(adj)) {
+        // We've already counted this adjacent node.
+        // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
+        // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
+        // but remains conservative.)
+        if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
+          high_degree_interferences -= from->EdgeWeightWith(adj);
+        }
+      } else {
+        high_degree_interferences += into->EdgeWeightWith(adj);
+      }
+    }
+  }
+
+  return high_degree_interferences < num_regs_;
+}
+
+void ColoringIteration::Combine(InterferenceNode* from,
+                                InterferenceNode* into) {
+  from->SetAlias(into);
+
+  // Add interferences.
+  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+    bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
+    AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
+    if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
+      // This is a (temporary) transition to a high degree node. Its degree will decrease again
+      // when we prune `from`, but it's best to be consistent about the current worklist.
+      adj->stage = NodeStage::kSpillWorklist;
+      spill_worklist_.push(adj);
+    }
+  }
+
+  // Add coalesce opportunities.
+  for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
+    if (opportunity->stage != CoalesceStage::kDefunct) {
+      into->AddCoalesceOpportunity(opportunity);
+    }
+  }
+  EnableCoalesceOpportunities(from);
+
+  // Prune and update worklists.
+  PruneNode(from);
+  if (IsLowDegreeNode(into, num_regs_)) {
+    // Coalesce(...) takes care of checking for a transition to the simplify worklist.
+    DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
+  } else if (into->stage == NodeStage::kFreezeWorklist) {
+    // This is a transition to a high degree node.
+    into->stage = NodeStage::kSpillWorklist;
+    spill_worklist_.push(into);
+  } else {
+    DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
+  }
+}
+
+void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
+  InterferenceNode* from = opportunity->node_a->GetAlias();
+  InterferenceNode* into = opportunity->node_b->GetAlias();
+  DCHECK_NE(from->stage, NodeStage::kPruned);
+  DCHECK_NE(into->stage, NodeStage::kPruned);
+
+  if (from->IsPrecolored()) {
+    // If we have one pre-colored node, make sure it's the `into` node.
+    std::swap(from, into);
+  }
+
+  if (from == into) {
+    // These nodes have already been coalesced.
+    opportunity->stage = CoalesceStage::kDefunct;
+    CheckTransitionFromFreezeWorklist(from);
+  } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
+    // These nodes interfere.
+    opportunity->stage = CoalesceStage::kDefunct;
+    CheckTransitionFromFreezeWorklist(from);
+    CheckTransitionFromFreezeWorklist(into);
+  } else if (PrecoloredHeuristic(from, into)
+          || UncoloredHeuristic(from, into)) {
+    // We can coalesce these nodes.
+    opportunity->stage = CoalesceStage::kDefunct;
+    Combine(from, into);
+    CheckTransitionFromFreezeWorklist(into);
+  } else {
+    // We cannot coalesce, but we may be able to later.
+    opportunity->stage = CoalesceStage::kActive;
+  }
+}
+
+// Build a mask with a bit set for each register assigned to some
+// interval in `intervals`.
+template <typename Container>
+static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) {
+  std::bitset<kMaxNumRegs> conflict_mask;
+  for (InterferenceNode* adjacent : intervals) {
+    LiveInterval* conflicting = adjacent->GetInterval();
+    if (conflicting->HasRegister()) {
+      conflict_mask.set(conflicting->GetRegister());
+      if (conflicting->HasHighInterval()) {
+        DCHECK(conflicting->GetHighInterval()->HasRegister());
+        conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
+      }
+    } else {
+      DCHECK(!conflicting->HasHighInterval()
+          || !conflicting->GetHighInterval()->HasRegister());
+    }
+  }
+  return conflict_mask;
+}
+
+bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
+  return processing_core_regs
+      ? !codegen_->IsCoreCalleeSaveRegister(reg)
+      : !codegen_->IsCoreCalleeSaveRegister(reg);
+}
+
+static bool RegisterIsAligned(size_t reg) {
+  return reg % 2 == 0;
+}
+
+static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
+  // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
+  // Note that CTZ is undefined if all bits are 0, so we special-case it.
+  return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
+}
+
+bool ColoringIteration::ColorInterferenceGraph() {
+  DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
+  ArenaVector<LiveInterval*> colored_intervals(
+      allocator_->Adapter(kArenaAllocRegisterAllocator));
+  bool successful = true;
+
+  while (!pruned_nodes_.empty()) {
+    InterferenceNode* node = pruned_nodes_.top();
+    pruned_nodes_.pop();
+    LiveInterval* interval = node->GetInterval();
+    size_t reg = 0;
+
+    InterferenceNode* alias = node->GetAlias();
+    if (alias != node) {
+      // This node was coalesced with another.
+      LiveInterval* alias_interval = alias->GetInterval();
+      if (alias_interval->HasRegister()) {
+        reg = alias_interval->GetRegister();
+        DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
+            << "This node conflicts with the register it was coalesced with";
+      } else {
+        DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
+            << "Move coalescing was not conservative, causing a node to be coalesced "
+            << "with another node that could not be colored";
+        if (interval->RequiresRegister()) {
+          successful = false;
+        }
+      }
+    } else {
+      // Search for free register(s).
+      std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
+      if (interval->HasHighInterval()) {
+        // Note that the graph coloring allocator assumes that pair intervals are aligned here,
+        // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
+        // change the alignment requirements here, we will have to update the algorithm (e.g.,
+        // be more conservative about the weight of edges adjacent to pair nodes.)
+        while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
+          reg += 2;
+        }
+
+        // Try to use a caller-save register first.
+        for (size_t i = 0; i < num_regs_ - 1; i += 2) {
+          bool low_caller_save  = register_allocator_->IsCallerSave(i, processing_core_regs_);
+          bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
+          if (!conflict_mask[i] && !conflict_mask[i + 1]) {
+            if (low_caller_save && high_caller_save) {
+              reg = i;
+              break;
+            } else if (low_caller_save || high_caller_save) {
+              reg = i;
+              // Keep looking to try to get both parts in caller-save registers.
+            }
+          }
+        }
+      } else {
+        // Not a pair interval.
+        reg = FindFirstZeroInConflictMask(conflict_mask);
+
+        // Try to use caller-save registers first.
+        for (size_t i = 0; i < num_regs_; ++i) {
+          if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
+            reg = i;
+            break;
+          }
+        }
+      }
+
+      // Last-chance coalescing.
+      for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+        if (opportunity->stage == CoalesceStage::kDefunct) {
+          continue;
+        }
+        LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
+            ? opportunity->node_b->GetAlias()->GetInterval()
+            : opportunity->node_a->GetAlias()->GetInterval();
+        if (other_interval->HasRegister()) {
+          size_t coalesce_register = other_interval->GetRegister();
+          if (interval->HasHighInterval()) {
+            if (!conflict_mask[coalesce_register] &&
+                !conflict_mask[coalesce_register + 1] &&
+                RegisterIsAligned(coalesce_register)) {
+              reg = coalesce_register;
+              break;
+            }
+          } else if (!conflict_mask[coalesce_register]) {
+            reg = coalesce_register;
+            break;
+          }
+        }
+      }
+    }
+
+    if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
+      // Assign register.
+      DCHECK(!interval->HasRegister());
+      interval->SetRegister(reg);
+      colored_intervals.push_back(interval);
+      if (interval->HasHighInterval()) {
+        DCHECK(!interval->GetHighInterval()->HasRegister());
+        interval->GetHighInterval()->SetRegister(reg + 1);
+        colored_intervals.push_back(interval->GetHighInterval());
+      }
+    } else if (interval->RequiresRegister()) {
+      // The interference graph is too dense to color. Make it sparser by
+      // splitting this live interval.
+      successful = false;
+      register_allocator_->SplitAtRegisterUses(interval);
+      // We continue coloring, because there may be additional intervals that cannot
+      // be colored, and that we should split.
+    } else {
+      // Spill.
+      node->SetNeedsSpillSlot();
+    }
+  }
+
+  // If unsuccessful, reset all register assignments.
+  if (!successful) {
+    for (LiveInterval* interval : colored_intervals) {
+      interval->ClearRegister();
+    }
+  }
+
+  return successful;
+}
+
+size_t RegisterAllocatorGraphColor::ComputeMaxSafepointLiveRegisters(
+    const ArenaVector<InterferenceNode*>& safepoints) {
+  size_t max_safepoint_live_regs = 0;
+  for (InterferenceNode* safepoint : safepoints) {
+    DCHECK(safepoint->GetInterval()->IsSlowPathSafepoint());
+    std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(safepoint->GetAdjacentNodes());
+    size_t live_regs = conflict_mask.count();
+    max_safepoint_live_regs = std::max(max_safepoint_live_regs, live_regs);
+  }
+  return max_safepoint_live_regs;
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
+  // The register allocation resolver will organize the stack based on value type,
+  // so we assign stack slots for each value type separately.
+  ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+  ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+  ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+  ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // The set of parent intervals already handled.
+  ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  // Find nodes that need spill slots.
+  for (InterferenceNode* node : nodes) {
+    if (!node->NeedsSpillSlot()) {
+      continue;
+    }
+
+    LiveInterval* parent = node->GetInterval()->GetParent();
+    if (seen.find(parent) != seen.end()) {
+      // We've already handled this interval.
+      // This can happen if multiple siblings of the same interval request a stack slot.
+      continue;
+    }
+    seen.insert(parent);
+
+    HInstruction* defined_by = parent->GetDefinedBy();
+    if (parent->HasSpillSlot()) {
+      // We already have a spill slot for this value that we can reuse.
+    } else if (defined_by->IsParameterValue()) {
+      // Parameters already have a stack slot.
+      parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+    } else if (defined_by->IsCurrentMethod()) {
+      // The current method is always at stack slot 0.
+      parent->SetSpillSlot(0);
+    } else if (defined_by->IsConstant()) {
+      // Constants don't need a spill slot.
+    } else {
+      // We need to find a spill slot for this interval. Place it in the correct
+      // worklist to be processed later.
+      switch (node->GetInterval()->GetType()) {
+        case Primitive::kPrimDouble:
+          double_intervals.push_back(parent);
+          break;
+        case Primitive::kPrimLong:
+          long_intervals.push_back(parent);
+          break;
+        case Primitive::kPrimFloat:
+          float_intervals.push_back(parent);
+          break;
+        case Primitive::kPrimNot:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimShort:
+          int_intervals.push_back(parent);
+          break;
+        case Primitive::kPrimVoid:
+          LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
+          UNREACHABLE();
+      }
+    }
+  }
+
+  // Color spill slots for each value type.
+  ColorSpillSlots(&double_intervals, &num_double_spill_slots_);
+  ColorSpillSlots(&long_intervals, &num_long_spill_slots_);
+  ColorSpillSlots(&float_intervals, &num_float_spill_slots_);
+  ColorSpillSlots(&int_intervals, &num_int_spill_slots_);
+}
+
+void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals,
+                                                  size_t* num_stack_slots_used) {
+  // We cannot use the original interference graph here because spill slots are assigned to
+  // all of the siblings of an interval, whereas an interference node represents only a single
+  // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
+  // by position, and assigning the lowest spill slot available when we encounter an interval
+  // beginning. We ignore lifetime holes for simplicity.
+  ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
+      allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+  for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
+    LiveInterval* parent_interval = *it;
+    DCHECK(parent_interval->IsParent());
+    DCHECK(!parent_interval->HasSpillSlot());
+    size_t start = parent_interval->GetStart();
+    size_t end = parent_interval->GetLastSibling()->GetEnd();
+    DCHECK_LT(start, end);
+    interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
+    interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
+  }
+
+  // Sort by position.
+  // We explicitly ignore the third entry of each tuple (the interval pointer) in order
+  // to maintain determinism.
+  std::sort(interval_endpoints.begin(), interval_endpoints.end(),
+            [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
+                const std::tuple<size_t, bool, LiveInterval*>& rhs) {
+    return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+         < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+  });
+
+  ArenaBitVector taken(allocator_, 0, true);
+  for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
+    // Extract information from the current tuple.
+    LiveInterval* parent_interval;
+    bool is_interval_beginning;
+    size_t position;
+    std::tie(position, is_interval_beginning, parent_interval) = *it;
+
+    bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+
+    if (is_interval_beginning) {
+      DCHECK(!parent_interval->HasSpillSlot());
+      DCHECK_EQ(position, parent_interval->GetStart());
+
+      // Find a free stack slot.
+      size_t slot = 0;
+      for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
+        // Skip taken slots.
+      }
+      parent_interval->SetSpillSlot(slot);
+
+      *num_stack_slots_used = std::max(*num_stack_slots_used,
+                                       needs_two_slots ? slot + 1 : slot + 2);
+      if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+        // The parallel move resolver requires that there be an even number of spill slots
+        // allocated for pair value types.
+        ++(*num_stack_slots_used);
+      }
+
+      taken.SetBit(slot);
+      if (needs_two_slots) {
+        taken.SetBit(slot + 1);
+      }
+    } else {
+      DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
+      DCHECK(parent_interval->HasSpillSlot());
+
+      // Free up the stack slot used by this interval.
+      size_t slot = parent_interval->GetSpillSlot();
+      DCHECK(taken.IsBitSet(slot));
+      DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
+      taken.ClearBit(slot);
+      if (needs_two_slots) {
+        taken.ClearBit(slot + 1);
+      }
+    }
+  }
+  DCHECK_EQ(taken.NumSetBits(), 0u);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
new file mode 100644
index 0000000..ed12561
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class Location;
+class SsaLivenessAnalysis;
+class InterferenceNode;
+struct CoalesceOpportunity;
+enum class CoalesceKind;
+
+/**
+ * A graph coloring register allocator.
+ *
+ * The algorithm proceeds as follows:
+ * (1) Build an interference graph, where nodes represent live intervals, and edges represent
+ *     interferences between two intervals. Coloring this graph with k colors is isomorphic to
+ *     finding a valid register assignment with k registers.
+ * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are
+ *     guaranteed a color. (No matter how we color their adjacent nodes, we can give them a
+ *     different color.) As we prune nodes from the graph, more nodes may drop below degree k,
+ *     enabling further pruning. The key is to maintain the pruning order in a stack, so that we
+ *     can color the nodes in the reverse order.
+ *     When there are no more nodes with degree less than k, we start pruning alternate nodes based
+ *     on heuristics. Since these nodes are not guaranteed a color, we are careful to
+ *     prioritize nodes that require a register. We also prioritize short intervals, because
+ *     short intervals cannot be split very much if coloring fails (see below). "Prioritizing"
+ *     a node amounts to pruning it later, since it will have fewer interferences if we prune other
+ *     nodes first.
+ * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign
+ *     a node a color, we do one of two things:
+ *     - If the node requires a register, we consider the current coloring attempt a failure.
+ *       However, we split the node's live interval in order to make the interference graph
+ *       sparser, so that future coloring attempts may succeed.
+ *     - If the node does not require a register, we simply assign it a location on the stack.
+ *
+ * If iterative move coalescing is enabled, the algorithm also attempts to conservatively
+ * combine nodes in the graph that would prefer to have the same color. (For example, the output
+ * of a phi instruction would prefer to have the same register as at least one of its inputs.)
+ * There are several additional steps involved with this:
+ * - We look for coalesce opportunities by examining each live interval, a step similar to that
+ *   used by linear scan when looking for register hints.
+ * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist
+ *   of low degree nodes that have associated coalesce opportunities. Only when we run out of
+ *   coalesce opportunities do we start pruning coalesce-associated nodes.
+ * - When pruning a node, if any nodes transition from high degree to low degree, we add
+ *   associated coalesce opportunities to the worklist, since these opportunities may now succeed.
+ * - Whether two nodes can be combined is decided by two different heuristics--one used when
+ *   coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node.
+ *   It is vital that we only combine two nodes if the node that remains is guaranteed to receive
+ *   a color. This is because additionally spilling is more costly than failing to coalesce.
+ * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around
+ *   to be used as last-chance register hints when coloring. If nothing else, we try to use
+ *   caller-save registers before callee-save registers.
+ *
+ * A good reference for graph coloring register allocation is
+ * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition).
+ */
+class RegisterAllocatorGraphColor : public RegisterAllocator {
+ public:
+  RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+                              CodeGenerator* codegen,
+                              const SsaLivenessAnalysis& analysis,
+                              bool iterative_move_coalescing = true);
+  ~RegisterAllocatorGraphColor() OVERRIDE {}
+
+  void AllocateRegisters() OVERRIDE;
+
+  bool Validate(bool log_fatal_on_failure);
+
+ private:
+  // Collect all intervals and prepare for register allocation.
+  void ProcessInstructions();
+  void ProcessInstruction(HInstruction* instruction);
+
+  // If any inputs require specific registers, block those registers
+  // at the position of this instruction.
+  void CheckForFixedInputs(HInstruction* instruction);
+
+  // If the output of an instruction requires a specific register, split
+  // the interval and assign the register to the first part.
+  void CheckForFixedOutput(HInstruction* instruction);
+
+  // Add all applicable safepoints to a live interval.
+  // Currently depends on instruction processing order.
+  void AddSafepointsFor(HInstruction* instruction);
+
+  // Collect all live intervals associated with the temporary locations
+  // needed by an instruction.
+  void CheckForTempLiveIntervals(HInstruction* instruction);
+
+  // If a safe point is needed, add a synthesized interval to later record
+  // the number of live registers at this point.
+  void CheckForSafepoint(HInstruction* instruction);
+
+  // Split an interval, but only if `position` is inside of `interval`.
+  // Return either the new interval, or the original interval if not split.
+  static LiveInterval* TrySplit(LiveInterval* interval, size_t position);
+
+  // To ensure every graph can be colored, split live intervals
+  // at their register defs and uses. This creates short intervals with low
+  // degree in the interference graph, which are prioritized during graph
+  // coloring.
+  void SplitAtRegisterUses(LiveInterval* interval);
+
+  // If the given instruction is a catch phi, give it a spill slot.
+  void AllocateSpillSlotForCatchPhi(HInstruction* instruction);
+
+  // Ensure that the given register cannot be allocated for a given range.
+  void BlockRegister(Location location, size_t start, size_t end);
+  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+  bool IsCallerSave(size_t reg, bool processing_core_regs);
+
+  // Return the maximum number of registers live at safepoints,
+  // based on the outgoing interference edges of safepoint nodes.
+  size_t ComputeMaxSafepointLiveRegisters(const ArenaVector<InterferenceNode*>& safepoints);
+
+  // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
+  // assigned the same stack slot.
+  void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes,
+                       size_t* num_stack_slots_used);
+
+  // Provide stack slots to nodes that need them.
+  void AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes);
+
+  // Whether iterative move coalescing should be performed. Iterative move coalescing
+  // improves code quality, but increases compile time.
+  const bool iterative_move_coalescing_;
+
+  // Live intervals, split by kind (core and floating point).
+  // These should not contain high intervals, as those are represented by
+  // the corresponding low interval throughout register allocation.
+  ArenaVector<LiveInterval*> core_intervals_;
+  ArenaVector<LiveInterval*> fp_intervals_;
+
+  // Intervals for temporaries, saved for special handling in the resolution phase.
+  ArenaVector<LiveInterval*> temp_intervals_;
+
+  // Safepoints, saved for special handling while processing instructions.
+  ArenaVector<HInstruction*> safepoints_;
+
+  // Interference nodes representing specific registers. These are "pre-colored" nodes
+  // in the interference graph.
+  ArenaVector<InterferenceNode*> physical_core_nodes_;
+  ArenaVector<InterferenceNode*> physical_fp_nodes_;
+
+  // Allocated stack slot counters.
+  size_t num_int_spill_slots_;
+  size_t num_double_spill_slots_;
+  size_t num_float_spill_slots_;
+  size_t num_long_spill_slots_;
+  size_t catch_phi_spill_slot_counter_;
+
+  // Number of stack slots needed for the pointer to the current method.
+  // This is 1 for 32-bit architectures, and 2 for 64-bit architectures.
+  const size_t reserved_art_method_slots_;
+
+  // Number of stack slots needed for outgoing arguments.
+  const size_t reserved_out_slots_;
+
+  // The number of globally blocked core and floating point registers, such as the stack pointer.
+  size_t number_of_globally_blocked_core_regs_;
+  size_t number_of_globally_blocked_fp_regs_;
+
+  // The maximum number of registers live at safe points. Needed by the code generator.
+  size_t max_safepoint_live_core_regs_;
+  size_t max_safepoint_live_fp_regs_;
+
+  friend class ColoringIteration;
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
new file mode 100644
index 0000000..768ed2d
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -0,0 +1,1225 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_linear_scan.h"
+
+#include <iostream>
+#include <sstream>
+
+#include "base/bit_vector-inl.h"
+#include "base/enums.h"
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+static constexpr size_t kMaxLifetimePosition = -1;
+static constexpr size_t kDefaultNumberOfSpillSlots = 4;
+
+// For simplicity, we implement register pairs as (reg, reg + 1).
+// Note that this is a requirement for double registers on ARM, since we
+// allocate SRegister.
+static int GetHighForLowRegister(int reg) { return reg + 1; }
+static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
+  return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
+}
+
+RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+                                                         CodeGenerator* codegen,
+                                                         const SsaLivenessAnalysis& liveness)
+      : RegisterAllocator(allocator, codegen, liveness),
+        unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        unhandled_(nullptr),
+        handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        catch_phi_spill_slots_(0),
+        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        processing_core_registers_(false),
+        number_of_registers_(-1),
+        registers_array_(nullptr),
+        blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
+        blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
+        reserved_out_slots_(0),
+        maximum_number_of_live_core_registers_(0),
+        maximum_number_of_live_fp_registers_(0) {
+  temp_intervals_.reserve(4);
+  int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+
+  codegen->SetupBlockedRegisters();
+  physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
+  physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
+  // Always reserve for the current method and the graph's max out registers.
+  // TODO: compute it instead.
+  // ArtMethod* takes 2 vregs for 64 bits.
+  size_t ptr_size = static_cast<size_t>(InstructionSetPointerSize(codegen->GetInstructionSet()));
+  reserved_out_slots_ = ptr_size / kVRegSize + codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+}
+
+static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
+  if (interval == nullptr) return false;
+  bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
+      && (interval->GetType() != Primitive::kPrimFloat);
+  return processing_core_registers == is_core_register;
+}
+
+void RegisterAllocatorLinearScan::AllocateRegisters() {
+  AllocateRegistersInternal();
+  RegisterAllocationResolver(allocator_, codegen_, liveness_)
+      .Resolve(maximum_number_of_live_core_registers_,
+               maximum_number_of_live_fp_registers_,
+               reserved_out_slots_,
+               int_spill_slots_.size(),
+               long_spill_slots_.size(),
+               float_spill_slots_.size(),
+               double_spill_slots_.size(),
+               catch_phi_spill_slots_,
+               temp_intervals_);
+
+  if (kIsDebugBuild) {
+    processing_core_registers_ = true;
+    ValidateInternal(true);
+    processing_core_registers_ = false;
+    ValidateInternal(true);
+    // Check that the linear order is still correct with regards to lifetime positions.
+    // Since only parallel moves have been inserted during the register allocation,
+    // these checks are mostly for making sure these moves have been added correctly.
+    size_t current_liveness = 0;
+    for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+        HInstruction* instruction = inst_it.Current();
+        DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
+        current_liveness = instruction->GetLifetimePosition();
+      }
+      for (HInstructionIterator inst_it(block->GetInstructions());
+           !inst_it.Done();
+           inst_it.Advance()) {
+        HInstruction* instruction = inst_it.Current();
+        DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
+        current_liveness = instruction->GetLifetimePosition();
+      }
+    }
+  }
+}
+
+void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) {
+  int reg = location.reg();
+  DCHECK(location.IsRegister() || location.IsFpuRegister());
+  LiveInterval* interval = location.IsRegister()
+      ? physical_core_register_intervals_[reg]
+      : physical_fp_register_intervals_[reg];
+  Primitive::Type type = location.IsRegister()
+      ? Primitive::kPrimInt
+      : Primitive::kPrimFloat;
+  if (interval == nullptr) {
+    interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
+    if (location.IsRegister()) {
+      physical_core_register_intervals_[reg] = interval;
+    } else {
+      physical_fp_register_intervals_[reg] = interval;
+    }
+  }
+  DCHECK(interval->GetRegister() == reg);
+  interval->AddRange(start, end);
+}
+
+void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+      BlockRegister(Location::RegisterLocation(i), start, end);
+    }
+  }
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+      BlockRegister(Location::FpuRegisterLocation(i), start, end);
+    }
+  }
+}
+
+void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
+  // Iterate post-order, to ensure the list is sorted, and the last added interval
+  // is the one with the lowest start position.
+  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
+         back_it.Advance()) {
+      ProcessInstruction(back_it.Current());
+    }
+    for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+      ProcessInstruction(inst_it.Current());
+    }
+
+    if (block->IsCatchBlock() ||
+        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // By blocking all registers at the top of each catch block or irreducible loop, we force
+      // intervals belonging to the live-in set of the catch/header block to be spilled.
+      // TODO(ngeoffray): Phis in this block could be allocated in register.
+      size_t position = block->GetLifetimeStart();
+      BlockRegisters(position, position + 1);
+    }
+  }
+
+  number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
+  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+                                                    kArenaAllocRegisterAllocator);
+  processing_core_registers_ = true;
+  unhandled_ = &unhandled_core_intervals_;
+  for (LiveInterval* fixed : physical_core_register_intervals_) {
+    if (fixed != nullptr) {
+      // Fixed interval is added to inactive_ instead of unhandled_.
+      // It's also the only type of inactive interval whose start position
+      // can be after the current interval during linear scan.
+      // Fixed interval is never split and never moves to unhandled_.
+      inactive_.push_back(fixed);
+    }
+  }
+  LinearScan();
+
+  inactive_.clear();
+  active_.clear();
+  handled_.clear();
+
+  number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
+  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+                                                    kArenaAllocRegisterAllocator);
+  processing_core_registers_ = false;
+  unhandled_ = &unhandled_fp_intervals_;
+  for (LiveInterval* fixed : physical_fp_register_intervals_) {
+    if (fixed != nullptr) {
+      // Fixed interval is added to inactive_ instead of unhandled_.
+      // It's also the only type of inactive interval whose start position
+      // can be after the current interval during linear scan.
+      // Fixed interval is never split and never moves to unhandled_.
+      inactive_.push_back(fixed);
+    }
+  }
+  LinearScan();
+}
+
+void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+
+  if (locations == nullptr) return;
+
+  // Create synthesized intervals for temporaries.
+  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+    Location temp = locations->GetTemp(i);
+    if (temp.IsRegister() || temp.IsFpuRegister()) {
+      BlockRegister(temp, position, position + 1);
+      // Ensure that an explicit temporary register is marked as being allocated.
+      codegen_->AddAllocatedRegister(temp);
+    } else {
+      DCHECK(temp.IsUnallocated());
+      switch (temp.GetPolicy()) {
+        case Location::kRequiresRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+          temp_intervals_.push_back(interval);
+          interval->AddTempUse(instruction, i);
+          unhandled_core_intervals_.push_back(interval);
+          break;
+        }
+
+        case Location::kRequiresFpuRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+          temp_intervals_.push_back(interval);
+          interval->AddTempUse(instruction, i);
+          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+            interval->AddHighInterval(/* is_temp */ true);
+            LiveInterval* high = interval->GetHighInterval();
+            temp_intervals_.push_back(high);
+            unhandled_fp_intervals_.push_back(high);
+          }
+          unhandled_fp_intervals_.push_back(interval);
+          break;
+        }
+
+        default:
+          LOG(FATAL) << "Unexpected policy for temporary location "
+                     << temp.GetPolicy();
+      }
+    }
+  }
+
+  bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
+      && (instruction->GetType() != Primitive::kPrimFloat);
+
+  if (locations->NeedsSafepoint()) {
+    if (codegen_->IsLeafMethod()) {
+      // TODO: We do this here because we do not want the suspend check to artificially
+      // create live registers. We should find another place, but this is currently the
+      // simplest.
+      DCHECK(instruction->IsSuspendCheckEntry());
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
+    safepoints_.push_back(instruction);
+    if (locations->OnlyCallsOnSlowPath()) {
+      // We add a synthesized range at this position to record the live registers
+      // at this position. Ideally, we could just update the safepoints when locations
+      // are updated, but we currently need to know the full stack size before updating
+      // locations (because of parameters and the fact that we don't have a frame pointer).
+      // And knowing the full stack size requires to know the maximum number of live
+      // registers at calls in slow paths.
+      // By adding the following interval in the algorithm, we can compute this
+      // maximum before updating locations.
+      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+      interval->AddRange(position, position + 1);
+      AddSorted(&unhandled_core_intervals_, interval);
+      AddSorted(&unhandled_fp_intervals_, interval);
+    }
+  }
+
+  if (locations->WillCall()) {
+    BlockRegisters(position, position + 1, /* caller_save_only */ true);
+  }
+
+  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+    Location input = locations->InAt(i);
+    if (input.IsRegister() || input.IsFpuRegister()) {
+      BlockRegister(input, position, position + 1);
+    } else if (input.IsPair()) {
+      BlockRegister(input.ToLow(), position, position + 1);
+      BlockRegister(input.ToHigh(), position, position + 1);
+    }
+  }
+
+  LiveInterval* current = instruction->GetLiveInterval();
+  if (current == nullptr) return;
+
+  ArenaVector<LiveInterval*>& unhandled = core_register
+      ? unhandled_core_intervals_
+      : unhandled_fp_intervals_;
+
+  DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
+
+  if (codegen_->NeedsTwoRegisters(current->GetType())) {
+    current->AddHighInterval();
+  }
+
+  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+    size_t safepoint_position = safepoint->GetLifetimePosition();
+
+    // Test that safepoints are ordered in the optimal way.
+    DCHECK(safepoint_index == safepoints_.size() ||
+           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+    if (safepoint_position == current->GetStart()) {
+      // The safepoint is for this instruction, so the location of the instruction
+      // does not need to be saved.
+      DCHECK_EQ(safepoint_index, safepoints_.size());
+      DCHECK_EQ(safepoint, instruction);
+      continue;
+    } else if (current->IsDeadAt(safepoint_position)) {
+      break;
+    } else if (!current->Covers(safepoint_position)) {
+      // Hole in the interval.
+      continue;
+    }
+    current->AddSafepoint(safepoint);
+  }
+  current->ResetSearchCache();
+
+  // Some instructions define their output in fixed register/stack slot. We need
+  // to ensure we know these locations before doing register allocation. For a
+  // given register, we create an interval that covers these locations. The register
+  // will be unavailable at these locations when trying to allocate one for an
+  // interval.
+  //
+  // The backwards walking ensures the ranges are ordered on increasing start positions.
+  Location output = locations->Out();
+  if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
+    Location first = locations->InAt(0);
+    if (first.IsRegister() || first.IsFpuRegister()) {
+      current->SetFrom(position + 1);
+      current->SetRegister(first.reg());
+    } else if (first.IsPair()) {
+      current->SetFrom(position + 1);
+      current->SetRegister(first.low());
+      LiveInterval* high = current->GetHighInterval();
+      high->SetRegister(first.high());
+      high->SetFrom(position + 1);
+    }
+  } else if (output.IsRegister() || output.IsFpuRegister()) {
+    // Shift the interval's start by one to account for the blocked register.
+    current->SetFrom(position + 1);
+    current->SetRegister(output.reg());
+    BlockRegister(output, position, position + 1);
+  } else if (output.IsPair()) {
+    current->SetFrom(position + 1);
+    current->SetRegister(output.low());
+    LiveInterval* high = current->GetHighInterval();
+    high->SetRegister(output.high());
+    high->SetFrom(position + 1);
+    BlockRegister(output.ToLow(), position, position + 1);
+    BlockRegister(output.ToHigh(), position, position + 1);
+  } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
+    current->SetSpillSlot(output.GetStackIndex());
+  } else {
+    DCHECK(output.IsUnallocated() || output.IsConstant());
+  }
+
+  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+    AllocateSpillSlotForCatchPhi(instruction->AsPhi());
+  }
+
+  // If needed, add interval to the list of unhandled intervals.
+  if (current->HasSpillSlot() || instruction->IsConstant()) {
+    // Split just before first register use.
+    size_t first_register_use = current->FirstRegisterUse();
+    if (first_register_use != kNoLifetime) {
+      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+      // Don't add directly to `unhandled`, it needs to be sorted and the start
+      // of this new interval might be after intervals already in the list.
+      AddSorted(&unhandled, split);
+    } else {
+      // Nothing to do, we won't allocate a register for this value.
+    }
+  } else {
+    // Don't add directly to `unhandled`, temp or safepoint intervals
+    // for this instruction may have been added, and those can be
+    // processed first.
+    AddSorted(&unhandled, current);
+  }
+}
+
+class AllRangesIterator : public ValueObject {
+ public:
+  explicit AllRangesIterator(LiveInterval* interval)
+      : current_interval_(interval),
+        current_range_(interval->GetFirstRange()) {}
+
+  bool Done() const { return current_interval_ == nullptr; }
+  LiveRange* CurrentRange() const { return current_range_; }
+  LiveInterval* CurrentInterval() const { return current_interval_; }
+
+  void Advance() {
+    current_range_ = current_range_->GetNext();
+    if (current_range_ == nullptr) {
+      current_interval_ = current_interval_->GetNextSibling();
+      if (current_interval_ != nullptr) {
+        current_range_ = current_interval_->GetFirstRange();
+      }
+    }
+  }
+
+ private:
+  LiveInterval* current_interval_;
+  LiveRange* current_range_;
+
+  DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
+};
+
+bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const {
+  // To simplify unit testing, we eagerly create the array of intervals, and
+  // call the helper method.
+  ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+  for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
+      intervals.push_back(instruction->GetLiveInterval());
+    }
+  }
+
+  const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
+      ? &physical_core_register_intervals_
+      : &physical_fp_register_intervals_;
+  for (LiveInterval* fixed : *physical_register_intervals) {
+    if (fixed != nullptr) {
+      intervals.push_back(fixed);
+    }
+  }
+
+  for (LiveInterval* temp : temp_intervals_) {
+    if (ShouldProcess(processing_core_registers_, temp)) {
+      intervals.push_back(temp);
+    }
+  }
+
+  return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
+                           allocator_, processing_core_registers_, log_fatal_on_failure);
+}
+
+void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
+  interval->Dump(stream);
+  stream << ": ";
+  if (interval->HasRegister()) {
+    if (interval->IsFloatingPoint()) {
+      codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
+    } else {
+      codegen_->DumpCoreRegister(stream, interval->GetRegister());
+    }
+  } else {
+    stream << "spilled";
+  }
+  stream << std::endl;
+}
+
+void RegisterAllocatorLinearScan::DumpAllIntervals(std::ostream& stream) const {
+  stream << "inactive: " << std::endl;
+  for (LiveInterval* inactive_interval : inactive_) {
+    DumpInterval(stream, inactive_interval);
+  }
+  stream << "active: " << std::endl;
+  for (LiveInterval* active_interval : active_) {
+    DumpInterval(stream, active_interval);
+  }
+  stream << "unhandled: " << std::endl;
+  auto unhandled = (unhandled_ != nullptr) ?
+      unhandled_ : &unhandled_core_intervals_;
+  for (LiveInterval* unhandled_interval : *unhandled) {
+    DumpInterval(stream, unhandled_interval);
+  }
+  stream << "handled: " << std::endl;
+  for (LiveInterval* handled_interval : handled_) {
+    DumpInterval(stream, handled_interval);
+  }
+}
+
+// By the book implementation of a linear scan register allocator.
+void RegisterAllocatorLinearScan::LinearScan() {
+  while (!unhandled_->empty()) {
+    // (1) Remove interval with the lowest start position from unhandled.
+    LiveInterval* current = unhandled_->back();
+    unhandled_->pop_back();
+
+    // Make sure the interval is an expected state.
+    DCHECK(!current->IsFixed() && !current->HasSpillSlot());
+    // Make sure we are going in the right order.
+    DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
+    // Make sure a low interval is always with a high.
+    DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
+    // Make sure a high interval is always with a low.
+    DCHECK(current->IsLowInterval() ||
+           unhandled_->empty() ||
+           !unhandled_->back()->IsHighInterval());
+
+    size_t position = current->GetStart();
+
+    // Remember the inactive_ size here since the ones moved to inactive_ from
+    // active_ below shouldn't need to be re-checked.
+    size_t inactive_intervals_to_handle = inactive_.size();
+
+    // (2) Remove currently active intervals that are dead at this position.
+    //     Move active intervals that have a lifetime hole at this position
+    //     to inactive.
+    auto active_kept_end = std::remove_if(
+        active_.begin(),
+        active_.end(),
+        [this, position](LiveInterval* interval) {
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (!interval->Covers(position)) {
+            inactive_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
+    active_.erase(active_kept_end, active_.end());
+
+    // (3) Remove currently inactive intervals that are dead at this position.
+    //     Move inactive intervals that cover this position to active.
+    auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
+    auto inactive_kept_end = std::remove_if(
+        inactive_.begin(),
+        inactive_to_handle_end,
+        [this, position](LiveInterval* interval) {
+          DCHECK(interval->GetStart() < position || interval->IsFixed());
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (interval->Covers(position)) {
+            active_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
+    inactive_.erase(inactive_kept_end, inactive_to_handle_end);
+
+    if (current->IsSlowPathSafepoint()) {
+      // Synthesized interval to record the maximum number of live registers
+      // at safepoints. No need to allocate a register for it.
+      if (processing_core_registers_) {
+        maximum_number_of_live_core_registers_ =
+          std::max(maximum_number_of_live_core_registers_, active_.size());
+      } else {
+        maximum_number_of_live_fp_registers_ =
+          std::max(maximum_number_of_live_fp_registers_, active_.size());
+      }
+      DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
+      continue;
+    }
+
+    if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
+      DCHECK(!current->HasRegister());
+      // Allocating the low part was unsucessful. The splitted interval for the high part
+      // will be handled next (it is in the `unhandled_` list).
+      continue;
+    }
+
+    // (4) Try to find an available register.
+    bool success = TryAllocateFreeReg(current);
+
+    // (5) If no register could be found, we need to spill.
+    if (!success) {
+      success = AllocateBlockedReg(current);
+    }
+
+    // (6) If the interval had a register allocated, add it to the list of active
+    //     intervals.
+    if (success) {
+      codegen_->AddAllocatedRegister(processing_core_registers_
+          ? Location::RegisterLocation(current->GetRegister())
+          : Location::FpuRegisterLocation(current->GetRegister()));
+      active_.push_back(current);
+      if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
+        current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
+      }
+    }
+  }
+}
+
+static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
+  DCHECK(!interval->IsHighInterval());
+  // Note that the same instruction may occur multiple times in the input list,
+  // so `free_until` may have changed already.
+  // Since `position` is not the current scan position, we need to use CoversSlow.
+  if (interval->IsDeadAt(position)) {
+    // Set the register to be free. Note that inactive intervals might later
+    // update this.
+    free_until[interval->GetRegister()] = kMaxLifetimePosition;
+    if (interval->HasHighInterval()) {
+      DCHECK(interval->GetHighInterval()->IsDeadAt(position));
+      free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+    }
+  } else if (!interval->CoversSlow(position)) {
+    // The interval becomes inactive at `defined_by`. We make its register
+    // available only until the next use strictly after `defined_by`.
+    free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+    if (interval->HasHighInterval()) {
+      DCHECK(!interval->GetHighInterval()->CoversSlow(position));
+      free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+    }
+  }
+}
+
+// Find a free register. If multiple are found, pick the register that
+// is free the longest.
+bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
+  size_t* free_until = registers_array_;
+
+  // First set all registers to be free.
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    free_until[i] = kMaxLifetimePosition;
+  }
+
+  // For each active interval, set its register to not free.
+  for (LiveInterval* interval : active_) {
+    DCHECK(interval->HasRegister());
+    free_until[interval->GetRegister()] = 0;
+  }
+
+  // An interval that starts an instruction (that is, it is not split), may
+  // re-use the registers used by the inputs of that instruciton, based on the
+  // location summary.
+  HInstruction* defined_by = current->GetDefinedBy();
+  if (defined_by != nullptr && !current->IsSplit()) {
+    LocationSummary* locations = defined_by->GetLocations();
+    if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
+      HInputsRef inputs = defined_by->GetInputs();
+      for (size_t i = 0; i < inputs.size(); ++i) {
+        // Take the last interval of the input. It is the location of that interval
+        // that will be used at `defined_by`.
+        LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
+        // Note that interval may have not been processed yet.
+        // TODO: Handle non-split intervals last in the work list.
+        if (locations->InAt(i).IsValid()
+            && interval->HasRegister()
+            && interval->SameRegisterKind(*current)) {
+          // The input must be live until the end of `defined_by`, to comply to
+          // the linear scan algorithm. So we use `defined_by`'s end lifetime
+          // position to check whether the input is dead or is inactive after
+          // `defined_by`.
+          DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
+          size_t position = defined_by->GetLifetimePosition() + 1;
+          FreeIfNotCoverAt(interval, position, free_until);
+        }
+      }
+    }
+  }
+
+  // For each inactive interval, set its register to be free until
+  // the next intersection with `current`.
+  for (LiveInterval* inactive : inactive_) {
+    // Temp/Slow-path-safepoint interval has no holes.
+    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+    if (!current->IsSplit() && !inactive->IsFixed()) {
+      // Neither current nor inactive are fixed.
+      // Thanks to SSA, a non-split interval starting in a hole of an
+      // inactive interval should never intersect with that inactive interval.
+      // Only if it's not fixed though, because fixed intervals don't come from SSA.
+      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+      continue;
+    }
+
+    DCHECK(inactive->HasRegister());
+    if (free_until[inactive->GetRegister()] == 0) {
+      // Already used by some active interval. No need to intersect.
+      continue;
+    }
+    size_t next_intersection = inactive->FirstIntersectionWith(current);
+    if (next_intersection != kNoLifetime) {
+      free_until[inactive->GetRegister()] =
+          std::min(free_until[inactive->GetRegister()], next_intersection);
+    }
+  }
+
+  int reg = kNoRegister;
+  if (current->HasRegister()) {
+    // Some instructions have a fixed register output.
+    reg = current->GetRegister();
+    if (free_until[reg] == 0) {
+      DCHECK(current->IsHighInterval());
+      // AllocateBlockedReg will spill the holder of the register.
+      return false;
+    }
+  } else {
+    DCHECK(!current->IsHighInterval());
+    int hint = current->FindFirstRegisterHint(free_until, liveness_);
+    if ((hint != kNoRegister)
+        // For simplicity, if the hint we are getting for a pair cannot be used,
+        // we are just going to allocate a new pair.
+        && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
+      DCHECK(!IsBlocked(hint));
+      reg = hint;
+    } else if (current->IsLowInterval()) {
+      reg = FindAvailableRegisterPair(free_until, current->GetStart());
+    } else {
+      reg = FindAvailableRegister(free_until, current);
+    }
+  }
+
+  DCHECK_NE(reg, kNoRegister);
+  // If we could not find a register, we need to spill.
+  if (free_until[reg] == 0) {
+    return false;
+  }
+
+  if (current->IsLowInterval()) {
+    // If the high register of this interval is not available, we need to spill.
+    int high_reg = current->GetHighInterval()->GetRegister();
+    if (high_reg == kNoRegister) {
+      high_reg = GetHighForLowRegister(reg);
+    }
+    if (free_until[high_reg] == 0) {
+      return false;
+    }
+  }
+
+  current->SetRegister(reg);
+  if (!current->IsDeadAt(free_until[reg])) {
+    // If the register is only available for a subset of live ranges
+    // covered by `current`, split `current` before the position where
+    // the register is not available anymore.
+    LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
+    DCHECK(split != nullptr);
+    AddSorted(unhandled_, split);
+  }
+  return true;
+}
+
+bool RegisterAllocatorLinearScan::IsBlocked(int reg) const {
+  return processing_core_registers_
+      ? blocked_core_registers_[reg]
+      : blocked_fp_registers_[reg];
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
+  int reg = kNoRegister;
+  // Pick the register pair that is used the last.
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    if (IsBlocked(i)) continue;
+    if (!IsLowRegister(i)) continue;
+    int high_register = GetHighForLowRegister(i);
+    if (IsBlocked(high_register)) continue;
+    int existing_high_register = GetHighForLowRegister(reg);
+    if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
+                        && next_use[high_register] >= next_use[existing_high_register])) {
+      reg = i;
+      if (next_use[i] == kMaxLifetimePosition
+          && next_use[high_register] == kMaxLifetimePosition) {
+        break;
+      }
+    } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
+      // If one of the current register is known to be unavailable, just unconditionally
+      // try a new one.
+      reg = i;
+    }
+  }
+  return reg;
+}
+
+bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const {
+  return processing_core_registers_
+      ? !codegen_->IsCoreCalleeSaveRegister(reg)
+      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
+  // We special case intervals that do not span a safepoint to try to find a caller-save
+  // register if one is available. We iterate from 0 to the number of registers,
+  // so if there are caller-save registers available at the end, we continue the iteration.
+  bool prefers_caller_save = !current->HasWillCallSafepoint();
+  int reg = kNoRegister;
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    if (IsBlocked(i)) {
+      // Register cannot be used. Continue.
+      continue;
+    }
+
+    // Best case: we found a register fully available.
+    if (next_use[i] == kMaxLifetimePosition) {
+      if (prefers_caller_save && !IsCallerSaveRegister(i)) {
+        // We can get shorter encodings on some platforms by using
+        // small register numbers. So only update the candidate if the previous
+        // one was not available for the whole method.
+        if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
+          reg = i;
+        }
+        // Continue the iteration in the hope of finding a caller save register.
+        continue;
+      } else {
+        reg = i;
+        // We know the register is good enough. Return it.
+        break;
+      }
+    }
+
+    // If we had no register before, take this one as a reference.
+    if (reg == kNoRegister) {
+      reg = i;
+      continue;
+    }
+
+    // Pick the register that is used the last.
+    if (next_use[i] > next_use[reg]) {
+      reg = i;
+      continue;
+    }
+  }
+  return reg;
+}
+
+// Remove interval and its other half if any. Return iterator to the following element.
+static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
+    ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
+  DCHECK(intervals->begin() <= pos && pos < intervals->end());
+  LiveInterval* interval = *pos;
+  if (interval->IsLowInterval()) {
+    DCHECK(pos + 1 < intervals->end());
+    DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
+    return intervals->erase(pos, pos + 2);
+  } else if (interval->IsHighInterval()) {
+    DCHECK(intervals->begin() < pos);
+    DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
+    return intervals->erase(pos - 1, pos + 1);
+  } else {
+    return intervals->erase(pos);
+  }
+}
+
+bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+                                                                           size_t first_register_use,
+                                                                           size_t* next_use) {
+  for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+    LiveInterval* active = *it;
+    DCHECK(active->HasRegister());
+    if (active->IsFixed()) continue;
+    if (active->IsHighInterval()) continue;
+    if (first_register_use > next_use[active->GetRegister()]) continue;
+
+    // Split the first interval found that is either:
+    // 1) A non-pair interval.
+    // 2) A pair interval whose high is not low + 1.
+    // 3) A pair interval whose low is not even.
+    if (!active->IsLowInterval() ||
+        IsLowOfUnalignedPairInterval(active) ||
+        !IsLowRegister(active->GetRegister())) {
+      LiveInterval* split = Split(active, position);
+      if (split != active) {
+        handled_.push_back(active);
+      }
+      RemoveIntervalAndPotentialOtherHalf(&active_, it);
+      AddSorted(unhandled_, split);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Find the register that is used the last, and spill the interval
+// that holds it. If the first use of `current` is after that register
+// we spill `current` instead.
+bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
+  size_t first_register_use = current->FirstRegisterUse();
+  if (current->HasRegister()) {
+    DCHECK(current->IsHighInterval());
+    // The low interval has allocated the register for the high interval. In
+    // case the low interval had to split both intervals, we may end up in a
+    // situation where the high interval does not have a register use anymore.
+    // We must still proceed in order to split currently active and inactive
+    // uses of the high interval's register, and put the high interval in the
+    // active set.
+    DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
+  } else if (first_register_use == kNoLifetime) {
+    AllocateSpillSlotFor(current);
+    return false;
+  }
+
+  // First set all registers as not being used.
+  size_t* next_use = registers_array_;
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    next_use[i] = kMaxLifetimePosition;
+  }
+
+  // For each active interval, find the next use of its register after the
+  // start of current.
+  for (LiveInterval* active : active_) {
+    DCHECK(active->HasRegister());
+    if (active->IsFixed()) {
+      next_use[active->GetRegister()] = current->GetStart();
+    } else {
+      size_t use = active->FirstRegisterUseAfter(current->GetStart());
+      if (use != kNoLifetime) {
+        next_use[active->GetRegister()] = use;
+      }
+    }
+  }
+
+  // For each inactive interval, find the next use of its register after the
+  // start of current.
+  for (LiveInterval* inactive : inactive_) {
+    // Temp/Slow-path-safepoint interval has no holes.
+    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+    if (!current->IsSplit() && !inactive->IsFixed()) {
+      // Neither current nor inactive are fixed.
+      // Thanks to SSA, a non-split interval starting in a hole of an
+      // inactive interval should never intersect with that inactive interval.
+      // Only if it's not fixed though, because fixed intervals don't come from SSA.
+      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+      continue;
+    }
+    DCHECK(inactive->HasRegister());
+    size_t next_intersection = inactive->FirstIntersectionWith(current);
+    if (next_intersection != kNoLifetime) {
+      if (inactive->IsFixed()) {
+        next_use[inactive->GetRegister()] =
+            std::min(next_intersection, next_use[inactive->GetRegister()]);
+      } else {
+        size_t use = inactive->FirstUseAfter(current->GetStart());
+        if (use != kNoLifetime) {
+          next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
+        }
+      }
+    }
+  }
+
+  int reg = kNoRegister;
+  bool should_spill = false;
+  if (current->HasRegister()) {
+    DCHECK(current->IsHighInterval());
+    reg = current->GetRegister();
+    // When allocating the low part, we made sure the high register was available.
+    DCHECK_LT(first_register_use, next_use[reg]);
+  } else if (current->IsLowInterval()) {
+    reg = FindAvailableRegisterPair(next_use, first_register_use);
+    // We should spill if both registers are not available.
+    should_spill = (first_register_use >= next_use[reg])
+      || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
+  } else {
+    DCHECK(!current->IsHighInterval());
+    reg = FindAvailableRegister(next_use, current);
+    should_spill = (first_register_use >= next_use[reg]);
+  }
+
+  DCHECK_NE(reg, kNoRegister);
+  if (should_spill) {
+    DCHECK(!current->IsHighInterval());
+    bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
+    if (is_allocation_at_use_site) {
+      if (!current->IsLowInterval()) {
+        DumpInterval(std::cerr, current);
+        DumpAllIntervals(std::cerr);
+        // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
+        HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
+        CHECK(false) << "There is not enough registers available for "
+          << current->GetParent()->GetDefinedBy()->DebugName() << " "
+          << current->GetParent()->GetDefinedBy()->GetId()
+          << " at " << first_register_use - 1 << " "
+          << (at == nullptr ? "" : at->DebugName());
+      }
+
+      // If we're allocating a register for `current` because the instruction at
+      // that position requires it, but we think we should spill, then there are
+      // non-pair intervals or unaligned pair intervals blocking the allocation.
+      // We split the first interval found, and put ourselves first in the
+      // `unhandled_` list.
+      bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
+                                                              first_register_use,
+                                                              next_use);
+      DCHECK(success);
+      LiveInterval* existing = unhandled_->back();
+      DCHECK(existing->IsHighInterval());
+      DCHECK_EQ(existing->GetLowInterval(), current);
+      unhandled_->push_back(current);
+    } else {
+      // If the first use of that instruction is after the last use of the found
+      // register, we split this interval just before its first register use.
+      AllocateSpillSlotFor(current);
+      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+      DCHECK(current != split);
+      AddSorted(unhandled_, split);
+    }
+    return false;
+  } else {
+    // Use this register and spill the active and inactives interval that
+    // have that register.
+    current->SetRegister(reg);
+
+    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+      LiveInterval* active = *it;
+      if (active->GetRegister() == reg) {
+        DCHECK(!active->IsFixed());
+        LiveInterval* split = Split(active, current->GetStart());
+        if (split != active) {
+          handled_.push_back(active);
+        }
+        RemoveIntervalAndPotentialOtherHalf(&active_, it);
+        AddSorted(unhandled_, split);
+        break;
+      }
+    }
+
+    // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
+    for (auto it = inactive_.begin(); it != inactive_.end(); ) {
+      LiveInterval* inactive = *it;
+      bool erased = false;
+      if (inactive->GetRegister() == reg) {
+        if (!current->IsSplit() && !inactive->IsFixed()) {
+          // Neither current nor inactive are fixed.
+          // Thanks to SSA, a non-split interval starting in a hole of an
+          // inactive interval should never intersect with that inactive interval.
+          // Only if it's not fixed though, because fixed intervals don't come from SSA.
+          DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+        } else {
+          size_t next_intersection = inactive->FirstIntersectionWith(current);
+          if (next_intersection != kNoLifetime) {
+            if (inactive->IsFixed()) {
+              LiveInterval* split = Split(current, next_intersection);
+              DCHECK_NE(split, current);
+              AddSorted(unhandled_, split);
+            } else {
+              // Split at the start of `current`, which will lead to splitting
+              // at the end of the lifetime hole of `inactive`.
+              LiveInterval* split = Split(inactive, current->GetStart());
+              // If it's inactive, it must start before the current interval.
+              DCHECK_NE(split, inactive);
+              it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
+              erased = true;
+              handled_.push_back(inactive);
+              AddSorted(unhandled_, split);
+            }
+          }
+        }
+      }
+      // If we have erased the element, `it` already points to the next element.
+      // Otherwise we need to move to the next element.
+      if (!erased) {
+        ++it;
+      }
+    }
+
+    return true;
+  }
+}
+
+void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
+  DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
+  size_t insert_at = 0;
+  for (size_t i = array->size(); i > 0; --i) {
+    LiveInterval* current = (*array)[i - 1u];
+    // High intervals must be processed right after their low equivalent.
+    if (current->StartsAfter(interval) && !current->IsHighInterval()) {
+      insert_at = i;
+      break;
+    } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
+      // Ensure the slow path interval is the last to be processed at its location: we want the
+      // interval to know all live registers at this location.
+      DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
+      insert_at = i;
+      break;
+    }
+  }
+
+  // Insert the high interval before the low, to ensure the low is processed before.
+  auto insert_pos = array->begin() + insert_at;
+  if (interval->HasHighInterval()) {
+    array->insert(insert_pos, { interval->GetHighInterval(), interval });
+  } else if (interval->HasLowInterval()) {
+    array->insert(insert_pos, { interval, interval->GetLowInterval() });
+  } else {
+    array->insert(insert_pos, interval);
+  }
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
+  if (interval->IsHighInterval()) {
+    // The low interval already took care of allocating the spill slot.
+    DCHECK(!interval->GetLowInterval()->HasRegister());
+    DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
+    return;
+  }
+
+  LiveInterval* parent = interval->GetParent();
+
+  // An instruction gets a spill slot for its entire lifetime. If the parent
+  // of this interval already has a spill slot, there is nothing to do.
+  if (parent->HasSpillSlot()) {
+    return;
+  }
+
+  HInstruction* defined_by = parent->GetDefinedBy();
+  DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
+
+  if (defined_by->IsParameterValue()) {
+    // Parameters have their own stack slot.
+    parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+    return;
+  }
+
+  if (defined_by->IsCurrentMethod()) {
+    parent->SetSpillSlot(0);
+    return;
+  }
+
+  if (defined_by->IsConstant()) {
+    // Constants don't need a spill slot.
+    return;
+  }
+
+  ArenaVector<size_t>* spill_slots = nullptr;
+  switch (interval->GetType()) {
+    case Primitive::kPrimDouble:
+      spill_slots = &double_spill_slots_;
+      break;
+    case Primitive::kPrimLong:
+      spill_slots = &long_spill_slots_;
+      break;
+    case Primitive::kPrimFloat:
+      spill_slots = &float_spill_slots_;
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimShort:
+      spill_slots = &int_spill_slots_;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+  }
+
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots->size(); slot < e; ++slot) {
+    if ((*spill_slots)[slot] <= parent->GetStart()) {
+      if (!parent->NeedsTwoSpillSlots()) {
+        // One spill slot is sufficient.
+        break;
+      }
+      if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
+        // Two spill slots are available.
+        break;
+      }
+    }
+  }
+
+  size_t end = interval->GetLastSibling()->GetEnd();
+  if (parent->NeedsTwoSpillSlots()) {
+    if (slot + 2u > spill_slots->size()) {
+      // We need a new spill slot.
+      spill_slots->resize(slot + 2u, end);
+    }
+    (*spill_slots)[slot] = end;
+    (*spill_slots)[slot + 1] = end;
+  } else {
+    if (slot == spill_slots->size()) {
+      // We need a new spill slot.
+      spill_slots->push_back(end);
+    } else {
+      (*spill_slots)[slot] = end;
+    }
+  }
+
+  // Note that the exact spill slot location will be computed when we resolve,
+  // that is when we know the number of spill slots for each type.
+  parent->SetSpillSlot(slot);
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
+  LiveInterval* interval = phi->GetLiveInterval();
+
+  HInstruction* previous_phi = phi->GetPrevious();
+  DCHECK(previous_phi == nullptr ||
+         previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+      << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
+
+  if (phi->IsVRegEquivalentOf(previous_phi)) {
+    // This is an equivalent of the previous phi. We need to assign the same
+    // catch phi slot.
+    DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+    interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+  } else {
+    // Allocate a new spill slot for this catch phi.
+    // TODO: Reuse spill slots when intervals of phis from different catch
+    //       blocks do not overlap.
+    interval->SetSpillSlot(catch_phi_spill_slots_);
+    catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
new file mode 100644
index 0000000..1a643a0
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class HPhi;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ */
+class RegisterAllocatorLinearScan : public RegisterAllocator {
+ public:
+  RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+                              CodeGenerator* codegen,
+                              const SsaLivenessAnalysis& analysis);
+  ~RegisterAllocatorLinearScan() OVERRIDE {}
+
+  void AllocateRegisters() OVERRIDE;
+
+  bool Validate(bool log_fatal_on_failure) OVERRIDE {
+    processing_core_registers_ = true;
+    if (!ValidateInternal(log_fatal_on_failure)) {
+      return false;
+    }
+    processing_core_registers_ = false;
+    return ValidateInternal(log_fatal_on_failure);
+  }
+
+  size_t GetNumberOfSpillSlots() const {
+    return int_spill_slots_.size()
+        + long_spill_slots_.size()
+        + float_spill_slots_.size()
+        + double_spill_slots_.size()
+        + catch_phi_spill_slots_;
+  }
+
+ private:
+  // Main methods of the allocator.
+  void LinearScan();
+  bool TryAllocateFreeReg(LiveInterval* interval);
+  bool AllocateBlockedReg(LiveInterval* interval);
+
+  // Add `interval` in the given sorted list.
+  static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+
+  // Returns whether `reg` is blocked by the code generator.
+  bool IsBlocked(int reg) const;
+
+  // Update the interval for the register in `location` to cover [start, end).
+  void BlockRegister(Location location, size_t start, size_t end);
+  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+  // Allocate a spill slot for the given interval. Should be called in linear
+  // order of interval starting positions.
+  void AllocateSpillSlotFor(LiveInterval* interval);
+
+  // Allocate a spill slot for the given catch phi. Will allocate the same slot
+  // for phis which share the same vreg. Must be called in reverse linear order
+  // of lifetime positions and ascending vreg numbers for correctness.
+  void AllocateSpillSlotForCatchPhi(HPhi* phi);
+
+  // Helper methods.
+  void AllocateRegistersInternal();
+  void ProcessInstruction(HInstruction* instruction);
+  bool ValidateInternal(bool log_fatal_on_failure) const;
+  void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
+  void DumpAllIntervals(std::ostream& stream) const;
+  int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
+  int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
+  bool IsCallerSaveRegister(int reg) const;
+
+  // Try splitting an active non-pair or unaligned pair interval at the given `position`.
+  // Returns whether it was successful at finding such an interval.
+  bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+                                                size_t first_register_use,
+                                                size_t* next_use);
+
+  // List of intervals for core registers that must be processed, ordered by start
+  // position. Last entry is the interval that has the lowest start position.
+  // This list is initially populated before doing the linear scan.
+  ArenaVector<LiveInterval*> unhandled_core_intervals_;
+
+  // List of intervals for floating-point registers. Same comments as above.
+  ArenaVector<LiveInterval*> unhandled_fp_intervals_;
+
+  // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
+  // or `unhandled_fp_intervals_`.
+  ArenaVector<LiveInterval*>* unhandled_;
+
+  // List of intervals that have been processed.
+  ArenaVector<LiveInterval*> handled_;
+
+  // List of intervals that are currently active when processing a new live interval.
+  // That is, they have a live range that spans the start of the new interval.
+  ArenaVector<LiveInterval*> active_;
+
+  // List of intervals that are currently inactive when processing a new live interval.
+  // That is, they have a lifetime hole that spans the start of the new interval.
+  ArenaVector<LiveInterval*> inactive_;
+
+  // Fixed intervals for physical registers. Such intervals cover the positions
+  // where an instruction requires a specific register.
+  ArenaVector<LiveInterval*> physical_core_register_intervals_;
+  ArenaVector<LiveInterval*> physical_fp_register_intervals_;
+
+  // Intervals for temporaries. Such intervals cover the positions
+  // where an instruction requires a temporary.
+  ArenaVector<LiveInterval*> temp_intervals_;
+
+  // The spill slots allocated for live intervals. We ensure spill slots
+  // are typed to avoid (1) doing moves and swaps between two different kinds
+  // of registers, and (2) swapping between a single stack slot and a double
+  // stack slot. This simplifies the parallel move resolver.
+  ArenaVector<size_t> int_spill_slots_;
+  ArenaVector<size_t> long_spill_slots_;
+  ArenaVector<size_t> float_spill_slots_;
+  ArenaVector<size_t> double_spill_slots_;
+
+  // Spill slots allocated to catch phis. This category is special-cased because
+  // (1) slots are allocated prior to linear scan and in reverse linear order,
+  // (2) equivalent phis need to share slots despite having different types.
+  size_t catch_phi_spill_slots_;
+
+  // Instructions that need a safepoint.
+  ArenaVector<HInstruction*> safepoints_;
+
+  // True if processing core registers. False if processing floating
+  // point registers.
+  bool processing_core_registers_;
+
+  // Number of registers for the current register kind (core or floating point).
+  size_t number_of_registers_;
+
+  // Temporary array, allocated ahead of time for simplicity.
+  size_t* registers_array_;
+
+  // Blocked registers, as decided by the code generator.
+  bool* const blocked_core_registers_;
+  bool* const blocked_fp_registers_;
+
+  // Slots reserved for out arguments.
+  size_t reserved_out_slots_;
+
+  // The maximum live core registers at safepoints.
+  size_t maximum_number_of_live_core_registers_;
+
+  // The maximum live FP registers at safepoints.
+  size_t maximum_number_of_live_fp_registers_;
+
+  ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
+  ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorLinearScan);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index a9de7c3..55ea99e 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -25,17 +25,35 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "ssa_phi_elimination.h"
 
 namespace art {
 
+using Strategy = RegisterAllocator::Strategy;
+
 // Note: the register allocator tests rely on the fact that constants have live
 // intervals and registers get allocated to them.
 
-class RegisterAllocatorTest : public CommonCompilerTest {};
+class RegisterAllocatorTest : public CommonCompilerTest {
+ protected:
+  // These functions need to access private variables of LocationSummary, so we declare it
+  // as a member of RegisterAllocatorTest, which we make a friend class.
+  static void SameAsFirstInputHint(Strategy strategy);
+  static void ExpectedInRegisterHint(Strategy strategy);
+};
 
-static bool Check(const uint16_t* data) {
+// This macro should include all register allocation strategies that should be tested.
+#define TEST_ALL_STRATEGIES(test_name)\
+TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
+  test_name(Strategy::kRegisterAllocatorLinearScan);\
+}\
+TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+  test_name(Strategy::kRegisterAllocatorGraphColor);\
+}
+
+static bool Check(const uint16_t* data, Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateCFG(&allocator, data);
@@ -44,9 +62,10 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  return register_allocator.Validate(false);
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+  register_allocator->AllocateRegisters();
+  return register_allocator->Validate(false);
 }
 
 /**
@@ -142,7 +161,7 @@
   }
 }
 
-TEST_F(RegisterAllocatorTest, CFG1) {
+static void CFG1(Strategy strategy) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -159,10 +178,12 @@
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop1) {
+TEST_ALL_STRATEGIES(CFG1);
+
+static void Loop1(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -198,10 +219,12 @@
     Instruction::CONST_4 | 5 << 12 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop2) {
+TEST_ALL_STRATEGIES(Loop1);
+
+static void Loop2(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -247,10 +270,12 @@
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN | 1 << 8);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop3) {
+TEST_ALL_STRATEGIES(Loop2);
+
+static void Loop3(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0
@@ -295,9 +320,10 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  ASSERT_TRUE(register_allocator.Validate(false));
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+  register_allocator->AllocateRegisters();
+  ASSERT_TRUE(register_allocator->Validate(false));
 
   HBasicBlock* loop_header = graph->GetBlocks()[2];
   HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
@@ -313,6 +339,8 @@
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
 
+TEST_ALL_STRATEGIES(Loop3);
+
 TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -353,7 +381,7 @@
   ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
 }
 
-TEST_F(RegisterAllocatorTest, DeadPhi) {
+static void DeadPhi(Strategy strategy) {
   /* Test for a dead loop phi taking as back-edge input a phi that also has
    * this loop phi as input. Walking backwards in SsaDeadPhiElimination
    * does not solve the problem because the loop phi will be visited last.
@@ -384,15 +412,19 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  ASSERT_TRUE(register_allocator.Validate(false));
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+  register_allocator->AllocateRegisters();
+  ASSERT_TRUE(register_allocator->Validate(false));
 }
 
+TEST_ALL_STRATEGIES(DeadPhi);
+
 /**
  * Test that the TryAllocateFreeReg method works in the presence of inactive intervals
  * that share the same register. It should split the interval it is currently
  * allocating for at the minimum lifetime position between the two inactive intervals.
+ * This test only applies to the linear scan allocator.
  */
 TEST_F(RegisterAllocatorTest, FreeUntil) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
@@ -408,7 +440,7 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+  RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
 
   // Add an artifical range to cover the temps that will be put in the unhandled list.
   LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval();
@@ -506,15 +538,15 @@
                                               graph->GetDexFile(),
                                               dex_cache,
                                               0);
-*input2 = new (allocator) HInstanceFieldGet(parameter,
-                                            Primitive::kPrimInt,
-                                            MemberOffset(42),
-                                            false,
-                                            kUnknownFieldIndex,
-                                            kUnknownClassDefIndex,
-                                            graph->GetDexFile(),
-                                            dex_cache,
-                                            0);
+  *input2 = new (allocator) HInstanceFieldGet(parameter,
+                                              Primitive::kPrimInt,
+                                              MemberOffset(42),
+                                              false,
+                                              kUnknownFieldIndex,
+                                              kUnknownClassDefIndex,
+                                              graph->GetDexFile(),
+                                              dex_cache,
+                                              0);
   then->AddInstruction(*input1);
   else_->AddInstruction(*input2);
   join->AddInstruction(new (allocator) HExit());
@@ -526,7 +558,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, PhiHint) {
+static void PhiHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HPhi *phi;
@@ -541,8 +573,9 @@
     liveness.Analyze();
 
     // Check that the register allocator is deterministic.
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 0);
@@ -560,8 +593,9 @@
     // Set the phi to a specific register, and check that the inputs get allocated
     // the same register.
     phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -579,8 +613,9 @@
     // Set input1 to a specific register, and check that the phi and other input get allocated
     // the same register.
     input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -598,8 +633,9 @@
     // Set input2 to a specific register, and check that the phi and other input get allocated
     // the same register.
     input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -607,6 +643,12 @@
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, PhiHint_LinearScan) {
+  PhiHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
                                 HInstruction** field,
                                 HInstruction** ret) {
@@ -645,7 +687,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
+void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *field, *ret;
@@ -658,8 +700,9 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
     ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 0);
@@ -677,13 +720,20 @@
     // Don't use SetInAt because we are overriding an already allocated location.
     ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint_LinearScan) {
+  ExpectedInRegisterHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
                             HInstruction** first_sub,
                             HInstruction** second_sub) {
@@ -713,7 +763,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
+void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *first_sub, *second_sub;
@@ -726,8 +776,9 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the registers are the same.
     ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1);
@@ -748,14 +799,21 @@
     ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
     ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2);
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint_LinearScan) {
+  SameAsFirstInputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildDiv(ArenaAllocator* allocator,
                         HInstruction** div) {
   HGraph* graph = CreateGraph(allocator);
@@ -782,7 +840,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+static void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *div;
@@ -795,17 +853,25 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+    register_allocator->AllocateRegisters();
 
     // div on x86 requires its first input in eax and the output be the same as the first input.
     ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0);
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint_LinearScan) {
+  ExpectedExactInRegisterAndSameOutputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 // Test a bug in the register allocator, where allocating a blocked
 // register would lead to spilling an inactive interval at the wrong
 // position.
+// This test only applies to the linear scan allocator.
 TEST_F(RegisterAllocatorTest, SpillInactive) {
   ArenaPool pool;
 
@@ -892,7 +958,7 @@
     liveness.instructions_from_lifetime_position_.push_back(user);
   }
 
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+  RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
   register_allocator.unhandled_core_intervals_.push_back(fourth);
   register_allocator.unhandled_core_intervals_.push_back(third);
   register_allocator.unhandled_core_intervals_.push_back(second);
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index e52476e..e409035 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -96,10 +96,10 @@
     // TODO(dbrazdil): This puts an instruction between If and its condition.
     //                 Implement moving of conditions to first users if possible.
     if (!true_block->IsSingleGoto()) {
-      true_block->MoveInstructionBefore(true_block->GetFirstInstruction(), if_instruction);
+      true_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
     if (!false_block->IsSingleGoto()) {
-      false_block->MoveInstructionBefore(false_block->GetFirstInstruction(), if_instruction);
+      false_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
     DCHECK(true_block->IsSingleGoto());
     DCHECK(false_block->IsSingleGoto());
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 08bd35f..6effc30 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -17,6 +17,7 @@
 #include "sharpening.h"
 
 #include "base/casts.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "code_generator.h"
 #include "driver/dex_compilation_unit.h"
@@ -40,13 +41,14 @@
       HInstruction* instruction = it.Current();
       if (instruction->IsInvokeStaticOrDirect()) {
         ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+      } else if (instruction->IsLoadClass()) {
+        ProcessLoadClass(instruction->AsLoadClass());
       } else if (instruction->IsLoadString()) {
         ProcessLoadString(instruction->AsLoadString());
       }
       // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder
       //       here. Rewrite it to avoid the CompilerDriver's reliance on verifier data
       //       because we know the type better when inlining.
-      // TODO: HLoadClass - select better load kind if available.
     }
   }
 }
@@ -153,6 +155,123 @@
   invoke->SetDispatchInfo(dispatch_info);
 }
 
+void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
+  if (load_class->NeedsAccessCheck()) {
+    // We need to call the runtime anyway, so we simply get the class as that call's return value.
+    return;
+  }
+  if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) {
+    // Loading from the ArtMethod* is the most efficient retrieval.
+    // TODO: This may not actually be true for all architectures and
+    // locations of target classes. The additional register pressure
+    // for using the ArtMethod* should be considered.
+    return;
+  }
+
+  DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  DCHECK(!load_class->IsInDexCache()) << "HLoadClass should not be optimized before sharpening.";
+
+  const DexFile& dex_file = load_class->GetDexFile();
+  uint32_t type_index = load_class->GetTypeIndex();
+
+  bool is_in_dex_cache = false;
+  HLoadClass::LoadKind desired_load_kind;
+  uint64_t address = 0u;  // Class or dex cache element address.
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* class_linker = runtime->GetClassLinker();
+    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
+        ? compilation_unit_.GetDexCache()
+        : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
+    mirror::Class* klass = dex_cache->GetResolvedType(type_index);
+
+    if (compiler_driver_->IsBootImage()) {
+      // Compiling boot image. Check if the class is a boot image class.
+      DCHECK(!runtime->UseJitCompilation());
+      if (!compiler_driver_->GetSupportBootImageFixup()) {
+        // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
+        desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+      } else {
+        if (klass != nullptr &&
+            compiler_driver_->IsImageClass(
+                dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+          is_in_dex_cache = true;
+          desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
+              ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
+              : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
+        } else {
+          // Not a boot image class. We must go through the dex cache.
+          DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
+          desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative;
+        }
+      }
+    } else if (runtime->UseJitCompilation()) {
+      // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
+      // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+      is_in_dex_cache = (klass != nullptr);
+      if (klass != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+        // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
+        desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(klass);
+      } else {
+        // Note: If the class is not in the dex cache or isn't initialized, the
+        // instruction needs environment and will not be inlined across dex files.
+        // Within a dex file, the slow-path helper loads the correct class and
+        // inlined frames are used correctly for OOM stack trace.
+        // TODO: Write a test for this. Bug: 29416588
+        desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress;
+        void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index];
+        address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
+      }
+    } else {
+      // AOT app compilation. Check if the class is in the boot image.
+      if ((klass != nullptr) &&
+          runtime->GetHeap()->ObjectIsInBootImageSpace(klass) &&
+          !codegen_->GetCompilerOptions().GetCompilePic()) {
+        desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(klass);
+      } else {
+        // Not JIT and either the klass is not in boot image or we are compiling in PIC mode.
+        // Use PC-relative load from the dex cache if the dex file belongs
+        // to the oat file that we're currently compiling.
+        desired_load_kind =
+            ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &load_class->GetDexFile())
+                ? HLoadClass::LoadKind::kDexCachePcRelative
+                : HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+    }
+  }
+  if (is_in_dex_cache) {
+    load_class->MarkInDexCache();
+  }
+
+  HLoadClass::LoadKind load_kind = codegen_->GetSupportedLoadClassKind(desired_load_kind);
+  switch (load_kind) {
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      load_class->SetLoadKindWithTypeReference(load_kind, dex_file, type_index);
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK_NE(address, 0u);
+      load_class->SetLoadKindWithAddress(load_kind, address);
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+      DexCacheArraysLayout layout(pointer_size, &dex_file);
+      size_t element_index = layout.TypeOffset(type_index);
+      load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load_kind;
+      UNREACHABLE();
+  }
+}
+
 void HSharpening::ProcessLoadString(HLoadString* load_string) {
   DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
   DCHECK(!load_string->IsInDexCache());
@@ -160,8 +279,7 @@
   const DexFile& dex_file = load_string->GetDexFile();
   uint32_t string_index = load_string->GetStringIndex();
 
-  bool is_in_dex_cache = false;
-  HLoadString::LoadKind desired_load_kind;
+  HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
   uint64_t address = 0u;  // String or dex cache element address.
   {
     Runtime* runtime = Runtime::Current();
@@ -177,56 +295,26 @@
       DCHECK(!runtime->UseJitCompilation());
       mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
       CHECK(string != nullptr);
-      if (!compiler_driver_->GetSupportBootImageFixup()) {
-        // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
-        desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
-      } else {
-        DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
-        is_in_dex_cache = true;
-        desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
-            ? HLoadString::LoadKind::kBootImageLinkTimePcRelative
-            : HLoadString::LoadKind::kBootImageLinkTimeAddress;
-      }
+      // TODO: In follow up CL, add PcRelative and Address back in.
     } else if (runtime->UseJitCompilation()) {
       // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
       // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
       mirror::String* string = dex_cache->GetResolvedString(string_index);
-      is_in_dex_cache = (string != nullptr);
       if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
         desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
         address = reinterpret_cast64<uint64_t>(string);
-      } else {
-        // Note: If the string is not in the dex cache, the instruction needs environment
-        // and will not be inlined across dex files. Within a dex file, the slow-path helper
-        // loads the correct string and inlined frames are used correctly for OOM stack trace.
-        // TODO: Write a test for this.
-        desired_load_kind = HLoadString::LoadKind::kDexCacheAddress;
-        void* dex_cache_element_address = &dex_cache->GetStrings()[string_index];
-        address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
       }
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
       mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache);
-      if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
-        if (codegen_->GetCompilerOptions().GetCompilePic()) {
-          // Use PC-relative load from the dex cache if the dex file belongs
-          // to the oat file that we're currently compiling.
-          desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)
-              ? HLoadString::LoadKind::kDexCachePcRelative
-              : HLoadString::LoadKind::kDexCacheViaMethod;
-        } else {
-          desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
-          address = reinterpret_cast64<uint64_t>(string);
-        }
-      } else {
-        // Not JIT and the string is not in boot image.
-        desired_load_kind = HLoadString::LoadKind::kDexCachePcRelative;
+      if (string != nullptr &&
+          runtime->GetHeap()->ObjectIsInBootImageSpace(string) &&
+          !codegen_->GetCompilerOptions().GetCompilePic()) {
+        desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(string);
       }
     }
   }
-  if (is_in_dex_cache) {
-    load_string->MarkInDexCache();
-  }
 
   HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
   switch (load_kind) {
@@ -241,7 +329,7 @@
       load_string->SetLoadKindWithAddress(load_kind, address);
       break;
     case HLoadString::LoadKind::kDexCachePcRelative: {
-      size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+      PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
       DexCacheArraysLayout layout(pointer_size, &dex_file);
       size_t element_index = layout.StringOffset(string_index);
       load_string->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 24152f6..d35ae66 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -47,6 +47,7 @@
 
  private:
   void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
+  void ProcessLoadClass(HLoadClass* load_class);
   void ProcessLoadString(HLoadString* load_string);
 
   CodeGenerator* codegen_;
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index 9bbc354..b01bc1c 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -148,19 +148,19 @@
   EXPECT_FALSE(any_write.MayDependOn(volatile_read));
 }
 
-TEST(SideEffectsTest, SameWidthTypes) {
+TEST(SideEffectsTest, SameWidthTypesNoAlias) {
   // Type I/F.
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
       SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false),
       SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false));
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
       SideEffects::ArrayWriteOfType(Primitive::kPrimInt),
       SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
   // Type L/D.
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
       SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false),
       SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false));
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
       SideEffects::ArrayWriteOfType(Primitive::kPrimLong),
       SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
 }
@@ -216,14 +216,32 @@
       "||||||L|",
       SideEffects::FieldWriteOfType(Primitive::kPrimNot, false).ToString().c_str());
   EXPECT_STREQ(
+      "||DFJISCBZL|DFJISCBZL||DFJISCBZL|DFJISCBZL|",
+      SideEffects::FieldWriteOfType(Primitive::kPrimNot, true).ToString().c_str());
+  EXPECT_STREQ(
       "|||||Z||",
       SideEffects::ArrayWriteOfType(Primitive::kPrimBoolean).ToString().c_str());
   EXPECT_STREQ(
+      "|||||C||",
+      SideEffects::ArrayWriteOfType(Primitive::kPrimChar).ToString().c_str());
+  EXPECT_STREQ(
+      "|||||S||",
+      SideEffects::ArrayWriteOfType(Primitive::kPrimShort).ToString().c_str());
+  EXPECT_STREQ(
       "|||B||||",
       SideEffects::FieldReadOfType(Primitive::kPrimByte, false).ToString().c_str());
   EXPECT_STREQ(
-      "||DJ|||||",  // note: DJ alias
+      "||D|||||",
       SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str());
+  EXPECT_STREQ(
+      "||J|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimLong).ToString().c_str());
+  EXPECT_STREQ(
+      "||F|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimFloat).ToString().c_str());
+  EXPECT_STREQ(
+      "||I|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimInt).ToString().c_str());
   SideEffects s = SideEffects::None();
   s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false));
   s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false));
@@ -231,9 +249,7 @@
   s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false));
   s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
   s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
-  EXPECT_STREQ(
-      "||DFJI|FI||S|DJC|",   // note: DJ/FI alias.
-      s.ToString().c_str());
+  EXPECT_STREQ("||DF|I||S|JC|", s.ToString().c_str());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 52ee3fb..5a574d9 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -123,8 +123,7 @@
 static bool TypePhiFromInputs(HPhi* phi) {
   Primitive::Type common_type = phi->GetType();
 
-  for (HInputIterator it(phi); !it.Done(); it.Advance()) {
-    HInstruction* input = it.Current();
+  for (HInstruction* input : phi->GetInputs()) {
     if (input->IsPhi() && input->AsPhi()->IsDead()) {
       // Phis are constructed live so if an input is a dead phi, it must have
       // been made dead due to type conflict. Mark this phi conflicting too.
@@ -169,8 +168,7 @@
     // or `common_type` is integral and we do not need to retype ambiguous inputs
     // because they are always constructed with the integral type candidate.
     if (kIsDebugBuild) {
-      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-        HInstruction* input = phi->InputAt(i);
+      for (HInstruction* input : phi->GetInputs()) {
         if (common_type == Primitive::kPrimVoid) {
           DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid);
         } else {
@@ -183,8 +181,9 @@
     return true;
   } else {
     DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type));
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      HInstruction* input = phi->InputAt(i);
+    HInputsRef inputs = phi->GetInputs();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      HInstruction* input = inputs[i];
       if (input->GetType() != common_type) {
         // Input type does not match phi's type. Try to retype the input or
         // generate a suitably typed equivalent.
@@ -391,6 +390,9 @@
             worklist.push_back(equivalent->AsPhi());
           }
         }
+        // Refine the side effects of this floating point aset. Note that we do this even if
+        // no replacement occurs, since the right-hand-side may have been corrected already.
+        aset->ComputeSideEffects();
       } else {
         // Array elements are integral and the value assigned to it initially
         // was integral too. Nothing to do.
@@ -615,11 +617,14 @@
       || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
       || (next->GetType() != type)) {
     ArenaAllocator* allocator = graph_->GetArena();
-    HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      // Copy the inputs. Note that the graph may not be correctly typed
-      // by doing this copy, but the type propagation phase will fix it.
-      new_phi->SetRawInputAt(i, phi->InputAt(i));
+    HInputsRef inputs = phi->GetInputs();
+    HPhi* new_phi =
+        new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type);
+    // Copy the inputs. Note that the graph may not be correctly typed
+    // by doing this copy, but the type propagation phase will fix it.
+    ArrayRef<HUserRecord<HInstruction*>> new_input_records = new_phi->GetInputRecords();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      new_input_records[i] = HUserRecord<HInstruction*>(inputs[i]);
     }
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
     DCHECK(new_phi->IsLive());
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 36e0d99..a01e107 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -177,8 +177,9 @@
 static void RecursivelyProcessInputs(HInstruction* current,
                                      HInstruction* actual_user,
                                      BitVector* live_in) {
-  for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-    HInstruction* input = current->InputAt(i);
+  HInputsRef inputs = current->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    HInstruction* input = inputs[i];
     bool has_in_location = current->GetLocations()->InAt(i).IsValid();
     bool has_out_location = input->GetLocations()->Out().IsValid();
 
@@ -367,6 +368,27 @@
   return live_in->UnionIfNotIn(live_out, kill);
 }
 
+void LiveInterval::DumpWithContext(std::ostream& stream,
+                                   const CodeGenerator& codegen) const {
+  Dump(stream);
+  if (IsFixed()) {
+    stream << ", register:" << GetRegister() << "(";
+    if (IsFloatingPoint()) {
+      codegen.DumpFloatingPointRegister(stream, GetRegister());
+    } else {
+      codegen.DumpCoreRegister(stream, GetRegister());
+    }
+    stream << ")";
+  } else {
+    stream << ", spill slot:" << GetSpillSlot();
+  }
+  stream << ", requires_register:" << (GetDefinedBy() != nullptr && RequiresRegister());
+  if (GetParent()->GetDefinedBy() != nullptr) {
+    stream << ", defined_by:" << GetParent()->GetDefinedBy()->GetKind();
+    stream << "(" << GetParent()->GetDefinedBy()->GetLifetimePosition() << ")";
+  }
+}
+
 static int RegisterOrLowRegister(Location location) {
   return location.IsPair() ? location.low() : location.reg();
 }
@@ -430,12 +452,12 @@
         // If the instruction dies at the phi assignment, we can try having the
         // same register.
         if (end == user->GetBlock()->GetPredecessors()[input_index]->GetLifetimeEnd()) {
-          for (size_t i = 0, e = user->InputCount(); i < e; ++i) {
+          HInputsRef inputs = user->GetInputs();
+          for (size_t i = 0; i < inputs.size(); ++i) {
             if (i == input_index) {
               continue;
             }
-            HInstruction* input = user->InputAt(i);
-            Location location = input->GetLiveInterval()->GetLocationAt(
+            Location location = inputs[i]->GetLiveInterval()->GetLocationAt(
                 user->GetBlock()->GetPredecessors()[i]->GetLifetimeEnd() - 1);
             if (location.IsRegisterKind()) {
               int reg = RegisterOrLowRegister(location);
@@ -471,10 +493,10 @@
   if (defined_by_->IsPhi()) {
     // Try to use the same register as one of the inputs.
     const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
-    for (size_t i = 0, e = defined_by_->InputCount(); i < e; ++i) {
-      HInstruction* input = defined_by_->InputAt(i);
+    HInputsRef inputs = defined_by_->GetInputs();
+    for (size_t i = 0; i < inputs.size(); ++i) {
       size_t end = predecessors[i]->GetLifetimeEnd();
-      LiveInterval* input_interval = input->GetLiveInterval()->GetSiblingAt(end - 1);
+      LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(end - 1);
       if (input_interval->GetEnd() == end) {
         // If the input dies at the end of the predecessor, we know its register can
         // be reused.
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 1fcba8b..92788fe 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -150,9 +150,7 @@
     if (GetIsEnvironment()) return false;
     if (IsSynthesized()) return false;
     Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
-    return location.IsUnallocated()
-        && (location.GetPolicy() == Location::kRequiresRegister
-            || location.GetPolicy() == Location::kRequiresFpuRegister);
+    return location.IsUnallocated() && location.RequiresRegisterKind();
   }
 
  private:
@@ -481,6 +479,10 @@
     return last_range_->GetEnd();
   }
 
+  size_t GetLength() const {
+    return GetEnd() - GetStart();
+  }
+
   size_t FirstRegisterUseAfter(size_t position) const {
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
@@ -504,10 +506,18 @@
     return kNoLifetime;
   }
 
+  // Returns the location of the first register use for this live interval,
+  // including a register definition if applicable.
   size_t FirstRegisterUse() const {
     return FirstRegisterUseAfter(GetStart());
   }
 
+  // Whether the interval requires a register rather than a stack location.
+  // If needed for performance, this could be cached.
+  bool RequiresRegister() const {
+    return !HasRegister() && FirstRegisterUse() != kNoLifetime;
+  }
+
   size_t FirstUseAfter(size_t position) const {
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
@@ -693,6 +703,10 @@
     stream << " is_high: " << IsHighInterval();
   }
 
+  // Same as Dump, but adds context such as the instruction defining this interval, and
+  // the register currently assigned to this interval.
+  void DumpWithContext(std::ostream& stream, const CodeGenerator& codegen) const;
+
   LiveInterval* GetNextSibling() const { return next_sibling_; }
   LiveInterval* GetLastSibling() {
     LiveInterval* result = this;
@@ -797,8 +811,8 @@
   bool IsUsingInputRegister() const {
     CHECK(kIsDebugBuild) << "Function should be used only for DCHECKs";
     if (defined_by_ != nullptr && !IsSplit()) {
-      for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) {
-        LiveInterval* interval = it.Current()->GetLiveInterval();
+      for (const HInstruction* input : defined_by_->GetInputs()) {
+        LiveInterval* interval = input->GetLiveInterval();
 
         // Find the interval that covers `defined_by`_. Calls to this function
         // are made outside the linear scan, hence we need to use CoversSlow.
@@ -828,8 +842,8 @@
       if (locations->OutputCanOverlapWithInputs()) {
         return false;
       }
-      for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) {
-        LiveInterval* interval = it.Current()->GetLiveInterval();
+      for (const HInstruction* input : defined_by_->GetInputs()) {
+        LiveInterval* interval = input->GetLiveInterval();
 
         // Find the interval that covers `defined_by`_. Calls to this function
         // are made outside the linear scan, hence we need to use CoversSlow.
@@ -871,6 +885,33 @@
     range_search_start_ = first_range_;
   }
 
+  bool DefinitionRequiresRegister() const {
+    DCHECK(IsParent());
+    LocationSummary* locations = defined_by_->GetLocations();
+    Location location = locations->Out();
+    // This interval is the first interval of the instruction. If the output
+    // of the instruction requires a register, we return the position of that instruction
+    // as the first register use.
+    if (location.IsUnallocated()) {
+      if ((location.GetPolicy() == Location::kRequiresRegister)
+           || (location.GetPolicy() == Location::kSameAsFirstInput
+               && (locations->InAt(0).IsRegister()
+                   || locations->InAt(0).IsRegisterPair()
+                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+        return true;
+      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+                 || (location.GetPolicy() == Location::kSameAsFirstInput
+                     && (locations->InAt(0).IsFpuRegister()
+                         || locations->InAt(0).IsFpuRegisterPair()
+                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+        return true;
+      }
+    } else if (location.IsRegister() || location.IsRegisterPair()) {
+      return true;
+    }
+    return false;
+  }
+
  private:
   LiveInterval(ArenaAllocator* allocator,
                Primitive::Type type,
@@ -925,33 +966,6 @@
     return range;
   }
 
-  bool DefinitionRequiresRegister() const {
-    DCHECK(IsParent());
-    LocationSummary* locations = defined_by_->GetLocations();
-    Location location = locations->Out();
-    // This interval is the first interval of the instruction. If the output
-    // of the instruction requires a register, we return the position of that instruction
-    // as the first register use.
-    if (location.IsUnallocated()) {
-      if ((location.GetPolicy() == Location::kRequiresRegister)
-           || (location.GetPolicy() == Location::kSameAsFirstInput
-               && (locations->InAt(0).IsRegister()
-                   || locations->InAt(0).IsRegisterPair()
-                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
-        return true;
-      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
-                 || (location.GetPolicy() == Location::kSameAsFirstInput
-                     && (locations->InAt(0).IsFpuRegister()
-                         || locations->InAt(0).IsFpuRegisterPair()
-                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
-        return true;
-      }
-    } else if (location.IsRegister() || location.IsRegisterPair()) {
-      return true;
-    }
-    return false;
-  }
-
   bool IsDefiningPosition(size_t position) const {
     return IsParent() && (position == GetStart());
   }
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index c67612e..b1ec99a 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -67,8 +67,8 @@
   while (!worklist_.empty()) {
     HPhi* phi = worklist_.back();
     worklist_.pop_back();
-    for (HInputIterator it(phi); !it.Done(); it.Advance()) {
-      HPhi* input = it.Current()->AsPhi();
+    for (HInstruction* raw_input : phi->GetInputs()) {
+      HPhi* input = raw_input->AsPhi();
       if (input != nullptr && input->IsDead()) {
         // Input is a dead phi. Revive it and add to the worklist. We make sure
         // that the phi was not dead initially (see definition of `initially_live`).
@@ -102,9 +102,7 @@
           }
         }
         // Remove the phi from use lists of its inputs.
-        for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-          phi->RemoveAsUserOfInput(i);
-        }
+        phi->RemoveAsUserOfAllInputs();
         // Remove the phi from environments that use it.
         for (const HUseListNode<HEnvironment*>& use : phi->GetEnvUses()) {
           HEnvironment* user = use.GetUser();
@@ -159,8 +157,7 @@
     bool irreducible_loop_phi_in_cycle = phi->IsIrreducibleLoopHeaderPhi();
 
     // First do a simple loop over inputs and check if they are all the same.
-    for (size_t j = 0; j < phi->InputCount(); ++j) {
-      HInstruction* input = phi->InputAt(j);
+    for (HInstruction* input : phi->GetInputs()) {
       if (input == phi) {
         continue;
       } else if (candidate == nullptr) {
@@ -181,8 +178,7 @@
         DCHECK(!current->IsLoopHeaderPhi() ||
                current->GetBlock()->IsLoopPreHeaderFirstPredecessor());
 
-        for (size_t j = 0; j < current->InputCount(); ++j) {
-          HInstruction* input = current->InputAt(j);
+        for (HInstruction* input : current->GetInputs()) {
           if (input == current) {
             continue;
           } else if (input->IsPhi()) {
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 11a254e..fc8af64 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -228,7 +228,7 @@
 
 void StackMapStream::ComputeInlineInfoEncoding() {
   uint32_t method_index_max = 0;
-  uint32_t dex_pc_max = 0;
+  uint32_t dex_pc_max = DexFile::kDexNoIndex;
   uint32_t invoke_type_max = 0;
 
   uint32_t inline_info_index = 0;
@@ -236,7 +236,10 @@
     for (size_t j = 0; j < entry.inlining_depth; ++j) {
       InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
       method_index_max = std::max(method_index_max, inline_entry.method_index);
-      dex_pc_max = std::max(dex_pc_max, inline_entry.dex_pc);
+      if (inline_entry.dex_pc != DexFile::kDexNoIndex &&
+          (dex_pc_max == DexFile::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) {
+        dex_pc_max = inline_entry.dex_pc;
+      }
       invoke_type_max = std::max(invoke_type_max, static_cast<uint32_t>(inline_entry.invoke_type));
     }
   }
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 41f72f5..53a9795 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -108,7 +108,7 @@
   };
 
   struct InlineInfoEntry {
-    uint32_t dex_pc;
+    uint32_t dex_pc;  // DexFile::kDexNoIndex for intrinsified native methods.
     uint32_t method_index;
     InvokeType invoke_type;
     uint32_t num_dex_registers;
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
new file mode 100644
index 0000000..8aa315a
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "x86_memory_gen.h"
+#include "code_generator.h"
+
+namespace art {
+namespace x86 {
+
+/**
+ * Replace instructions with memory operand forms.
+ */
+class MemoryOperandVisitor : public HGraphVisitor {
+ public:
+  MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks)
+      : HGraphVisitor(graph),
+        do_implicit_null_checks_(do_implicit_null_checks) {}
+
+ private:
+  void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE {
+    // Replace the length by the array itself, so that we can do compares to memory.
+    HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
+
+    // We only want to replace an ArrayLength.
+    if (array_len == nullptr) {
+      return;
+    }
+
+    HInstruction* array = array_len->InputAt(0);
+    DCHECK_EQ(array->GetType(), Primitive::kPrimNot);
+
+    // Don't apply this optimization when the array is nullptr.
+    if (array->IsConstant() || (array->IsNullCheck() && array->InputAt(0)->IsConstant())) {
+      return;
+    }
+
+    // Is there a null check that could be an implicit check?
+    if (array->IsNullCheck() && do_implicit_null_checks_) {
+      // The ArrayLen may generate the implicit null check.  Can the
+      // bounds check do so as well?
+      if (array_len->GetNextDisregardingMoves() != check) {
+        // No, it won't.  Leave as is.
+        return;
+      }
+    }
+
+    // Can we suppress the ArrayLength and generate at BoundCheck?
+    if (array_len->HasOnlyOneNonEnvironmentUse()) {
+      array_len->MarkEmittedAtUseSite();
+      // We need the ArrayLength just before the BoundsCheck.
+      array_len->MoveBefore(check);
+    }
+  }
+
+  bool do_implicit_null_checks_;
+};
+
+X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph,
+                                                       CodeGenerator* codegen,
+                                                       OptimizingCompilerStats* stats)
+    : HOptimization(graph, kX86MemoryOperandGenerationPassName, stats),
+      do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) {
+}
+
+void X86MemoryOperandGeneration::Run() {
+  MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_);
+  visitor.VisitInsertionOrder();
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
new file mode 100644
index 0000000..5f15d9f
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+#define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+class CodeGenerator;
+
+namespace x86 {
+
+class X86MemoryOperandGeneration : public HOptimization {
+ public:
+  X86MemoryOperandGeneration(HGraph* graph,
+                             CodeGenerator* codegen,
+                             OptimizingCompilerStats* stats);
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kX86MemoryOperandGenerationPassName =
+          "x86_memory_operand_generation";
+
+ private:
+  bool do_implicit_null_checks_;
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 1ee1c4d..304e56b 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -50,7 +50,7 @@
 #ifdef ART_ENABLE_CODEGEN_arm
 namespace arm {
 static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
-    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<4> offset) {
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset32 offset) {
   Thumb2Assembler assembler(arena);
 
   switch (abi) {
@@ -80,7 +80,7 @@
 #ifdef ART_ENABLE_CODEGEN_arm64
 namespace arm64 {
 static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
-    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<8> offset) {
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset64 offset) {
   Arm64Assembler assembler(arena);
 
   switch (abi) {
@@ -119,7 +119,7 @@
 #ifdef ART_ENABLE_CODEGEN_mips
 namespace mips {
 static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
-    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<4> offset) {
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset32 offset) {
   MipsAssembler assembler(arena);
 
   switch (abi) {
@@ -151,7 +151,7 @@
 #ifdef ART_ENABLE_CODEGEN_mips64
 namespace mips64 {
 static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
-    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<8> offset) {
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset64 offset) {
   Mips64Assembler assembler(arena);
 
   switch (abi) {
@@ -183,7 +183,7 @@
 #ifdef ART_ENABLE_CODEGEN_x86
 namespace x86 {
 static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* arena,
-                                                                    ThreadOffset<4> offset) {
+                                                                    ThreadOffset32 offset) {
   X86Assembler assembler(arena);
 
   // All x86 trampolines call via the Thread* held in fs.
@@ -204,7 +204,7 @@
 #ifdef ART_ENABLE_CODEGEN_x86_64
 namespace x86_64 {
 static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* arena,
-                                                                    ThreadOffset<8> offset) {
+                                                                    ThreadOffset64 offset) {
   x86_64::X86_64Assembler assembler(arena);
 
   // All x86 trampolines call via the Thread* held in gs.
@@ -224,7 +224,7 @@
 
 std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
                                                                EntryPointCallingConvention abi,
-                                                               ThreadOffset<8> offset) {
+                                                               ThreadOffset64 offset) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   switch (isa) {
@@ -250,7 +250,7 @@
 
 std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
                                                                EntryPointCallingConvention abi,
-                                                               ThreadOffset<4> offset) {
+                                                               ThreadOffset32 offset) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   switch (isa) {
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 8f823f1..1a10e4c 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -27,10 +27,10 @@
 // Create code that will invoke the function held in thread local storage.
 std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
                                                                EntryPointCallingConvention abi,
-                                                               ThreadOffset<4> entry_point_offset);
+                                                               ThreadOffset32 entry_point_offset);
 std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
                                                                EntryPointCallingConvention abi,
-                                                               ThreadOffset<8> entry_point_offset);
+                                                               ThreadOffset64 entry_point_offset);
 
 }  // namespace art
 
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index e5f91dc..d5cd59d 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -376,500 +376,6 @@
   }
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::ArmCore(static_cast<int>(reg));
-}
-
-static dwarf::Reg DWARFReg(SRegister reg) {
-  return dwarf::Reg::ArmFp(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = kArmPointerSize;
-
-void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& callee_save_regs,
-                              const ManagedRegisterEntrySpills& entry_spills) {
-  CHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
-
-  // Push callee saves and link register.
-  RegList core_spill_mask = 1 << LR;
-  uint32_t fp_spill_mask = 0;
-  for (const ManagedRegister& reg : callee_save_regs) {
-    if (reg.AsArm().IsCoreRegister()) {
-      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
-    } else {
-      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
-    }
-  }
-  PushList(core_spill_mask);
-  cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
-  cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
-  if (fp_spill_mask != 0) {
-    vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
-    cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
-    cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
-  }
-
-  // Increase frame to required size.
-  int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
-  CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
-  IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
-
-  // Write out Method*.
-  StoreToOffset(kStoreWord, R0, SP, 0);
-
-  // Write out entry spills.
-  int32_t offset = frame_size + kFramePointerSize;
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ArmManagedRegister reg = entry_spills.at(i).AsArm();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      ManagedRegisterSpill spill = entry_spills.at(i);
-      offset += spill.getSize();
-    } else if (reg.IsCoreRegister()) {
-      StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsSRegister()) {
-      StoreSToOffset(reg.AsSRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
-    }
-  }
-}
-
-void ArmAssembler::RemoveFrame(size_t frame_size,
-                              const std::vector<ManagedRegister>& callee_save_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-
-  // Compute callee saves to pop and PC.
-  RegList core_spill_mask = 1 << PC;
-  uint32_t fp_spill_mask = 0;
-  for (const ManagedRegister& reg : callee_save_regs) {
-    if (reg.AsArm().IsCoreRegister()) {
-      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
-    } else {
-      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
-    }
-  }
-
-  // Decrease frame to start of callee saves.
-  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
-  CHECK_GT(frame_size, pop_values * kFramePointerSize);
-  DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
-
-  if (fp_spill_mask != 0) {
-    vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
-    cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
-    cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
-  }
-
-  // Pop callee saves and PC.
-  PopList(core_spill_mask);
-
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void ArmAssembler::IncreaseFrameSize(size_t adjust) {
-  AddConstant(SP, -adjust);
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void ArmAssembler::DecreaseFrameSize(size_t adjust) {
-  AddConstant(SP, adjust);
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
-  ArmManagedRegister src = msrc.AsArm();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCoreRegister()) {
-    CHECK_EQ(4u, size);
-    StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
-    StoreToOffset(kStoreWord, src.AsRegisterPairHigh(),
-                  SP, dest.Int32Value() + 4);
-  } else if (src.IsSRegister()) {
-    StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value());
-  } else {
-    CHECK(src.IsDRegister()) << src;
-    StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
-  }
-}
-
-void ArmAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  ArmManagedRegister src = msrc.AsArm();
-  CHECK(src.IsCoreRegister()) << src;
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  ArmManagedRegister src = msrc.AsArm();
-  CHECK(src.IsCoreRegister()) << src;
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc,
-                              FrameOffset in_off, ManagedRegister mscratch) {
-  ArmManagedRegister src = msrc.AsArm();
-  ArmManagedRegister scratch = mscratch.AsArm();
-  StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
-}
-
-void ArmAssembler::CopyRef(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                           bool unpoison_reference) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(),
-                 base.AsArm().AsCoreRegister(), offs.Int32Value());
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dst.AsCoreRegister());
-  }
-}
-
-void ArmAssembler::LoadRef(ManagedRegister mdest, FrameOffset  src) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value());
-}
-
-void ArmAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                           Offset offs) {
-  ArmManagedRegister dst = mdest.AsArm();
-  CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(),
-                 base.AsArm().AsCoreRegister(), offs.Int32Value());
-}
-
-void ArmAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                      ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-}
-
-void ArmAssembler::StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm,
-                                       ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, dest.Int32Value());
-}
-
-static void EmitLoad(ArmAssembler* assembler, ManagedRegister m_dst,
-                     Register src_register, int32_t src_offset, size_t size) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  if (dst.IsNoRegister()) {
-    CHECK_EQ(0u, size) << dst;
-  } else if (dst.IsCoreRegister()) {
-    CHECK_EQ(4u, size) << dst;
-    assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
-  } else if (dst.IsRegisterPair()) {
-    CHECK_EQ(8u, size) << dst;
-    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset);
-    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4);
-  } else if (dst.IsSRegister()) {
-    assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset);
-  } else {
-    CHECK(dst.IsDRegister()) << dst;
-    assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset);
-  }
-}
-
-void ArmAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
-  return EmitLoad(this, m_dst, SP, src.Int32Value(), size);
-}
-
-void ArmAssembler::LoadFromThread32(ManagedRegister m_dst, ThreadOffset<4> src, size_t size) {
-  return EmitLoad(this, m_dst, TR, src.Int32Value(), size);
-}
-
-void ArmAssembler::LoadRawPtrFromThread32(ManagedRegister m_dst, ThreadOffset<4> offs) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value());
-}
-
-void ArmAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                        ThreadOffset<4> thr_offs,
-                                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 TR, thr_offs.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                SP, fr_offs.Int32Value());
-}
-
-void ArmAssembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs,
-                                      FrameOffset fr_offs,
-                                      ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 SP, fr_offs.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs,
-                                            FrameOffset fr_offs,
-                                            ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs) {
-  StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value());
-}
-
-void ArmAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
-}
-
-void ArmAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
-}
-
-void ArmAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) {
-  ArmManagedRegister dst = m_dst.AsArm();
-  ArmManagedRegister src = m_src.AsArm();
-  if (!dst.Equals(src)) {
-    if (dst.IsCoreRegister()) {
-      CHECK(src.IsCoreRegister()) << src;
-      mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
-    } else if (dst.IsDRegister()) {
-      CHECK(src.IsDRegister()) << src;
-      vmovd(dst.AsDRegister(), src.AsDRegister());
-    } else if (dst.IsSRegister()) {
-      CHECK(src.IsSRegister()) << src;
-      vmovs(dst.AsSRegister(), src.AsSRegister());
-    } else {
-      CHECK(dst.IsRegisterPair()) << dst;
-      CHECK(src.IsRegisterPair()) << src;
-      // Ensure that the first move doesn't clobber the input of the second.
-      if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
-        mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
-        mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
-      } else {
-        mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
-        mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
-      }
-    }
-  }
-}
-
-void ArmAssembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
-  }
-}
-
-void ArmAssembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  CHECK_EQ(size, 4u);
-  LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value());
-  StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
-}
-
-void ArmAssembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  CHECK_EQ(size, 4u);
-  LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
-  StoreToOffset(kStoreWord, scratch, dest_base.AsArm().AsCoreRegister(), dest_offset.Int32Value());
-}
-
-void ArmAssembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  Register scratch = mscratch.AsArm().AsCoreRegister();
-  LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value());
-  StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value());
-}
-
-void ArmAssembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  ArmManagedRegister out_reg = mout_reg.AsArm();
-  ArmManagedRegister in_reg = min_reg.AsArm();
-  CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
-  CHECK(out_reg.IsCoreRegister()) << out_reg;
-  if (null_allowed) {
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
-    if (in_reg.IsNoRegister()) {
-      LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
-                     SP, handle_scope_offset.Int32Value());
-      in_reg = out_reg;
-    }
-    cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
-    if (!out_reg.Equals(in_reg)) {
-      it(EQ, kItElse);
-      LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
-    } else {
-      it(NE);
-    }
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
-  } else {
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
-  }
-}
-
-void ArmAssembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  if (null_allowed) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP,
-                   handle_scope_offset.Int32Value());
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    cmp(scratch.AsCoreRegister(), ShifterOperand(0));
-    it(NE);
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
-  } else {
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
-  }
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
-}
-
-void ArmAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  ArmManagedRegister out_reg = mout_reg.AsArm();
-  ArmManagedRegister in_reg = min_reg.AsArm();
-  CHECK(out_reg.IsCoreRegister()) << out_reg;
-  CHECK(in_reg.IsCoreRegister()) << in_reg;
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);     // TODO: why EQ?
-  }
-  cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
-  it(NE);
-  LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
-                 in_reg.AsCoreRegister(), 0, NE);
-}
-
-void ArmAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void ArmAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void ArmAssembler::Call(ManagedRegister mbase, Offset offset,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister base = mbase.AsArm();
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(base.IsCoreRegister()) << base;
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 base.AsCoreRegister(), offset.Int32Value());
-  blx(scratch.AsCoreRegister());
-  // TODO: place reference map on call.
-}
-
-void ArmAssembler::Call(FrameOffset base, Offset offset,
-                        ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  // Call *(*(SP + base) + offset)
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 SP, base.Int32Value());
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 scratch.AsCoreRegister(), offset.Int32Value());
-  blx(scratch.AsCoreRegister());
-  // TODO: place reference map on call
-}
-
-void ArmAssembler::CallFromThread32(ThreadOffset<4> /*offset*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void ArmAssembler::GetCurrentThread(ManagedRegister tr) {
-  mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR));
-}
-
-void ArmAssembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister /*scratch*/) {
-  StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL);
-}
-
-void ArmAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  ArmExceptionSlowPath* slow = new (GetArena()) ArmExceptionSlowPath(scratch, stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 TR, Thread::ExceptionOffset<4>().Int32Value());
-  cmp(scratch.AsCoreRegister(), ShifterOperand(0));
-  b(slow->Entry(), NE);
-}
-
-void ArmExceptionSlowPath::Emit(Assembler* sasm) {
-  ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception object as argument.
-  // Don't care about preserving R0 as this call won't return.
-  __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
-  // Set up call to Thread::Current()->pDeliverException.
-  __ LoadFromOffset(kLoadWord, R12, TR, QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
-  __ blx(R12);
-#undef __
-}
-
-
 static int LeadingZeros(uint32_t val) {
   uint32_t alt;
   int32_t n;
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index ffbe786..c52a5a9 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -23,12 +23,14 @@
 #include "base/arena_allocator.h"
 #include "base/arena_containers.h"
 #include "base/bit_utils.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/value_object.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "offsets.h"
 
 namespace art {
@@ -671,10 +673,15 @@
   virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0;
   virtual void vmstat(Condition cond = AL) = 0;  // VMRS APSR_nzcv, FPSCR
 
+  virtual void vcntd(DRegister dd, DRegister dm) = 0;
+  virtual void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) = 0;
+
   virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpopd(DRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) = 0;
 
   // Branch instructions.
   virtual void b(Label* label, Condition cond = AL) = 0;
@@ -751,32 +758,7 @@
     }
   }
 
-  void LoadDImmediate(DRegister sd, double value, Condition cond = AL) {
-    if (!vmovd(sd, value, cond)) {
-      uint64_t int_value = bit_cast<uint64_t, double>(value);
-      if (int_value == bit_cast<uint64_t, double>(0.0)) {
-        // 0.0 is quite common, so we special case it by loading
-        // 2.0 in `sd` and then substracting it.
-        bool success = vmovd(sd, 2.0, cond);
-        CHECK(success);
-        vsubd(sd, sd, sd, cond);
-      } else {
-        if (sd < 16) {
-          SRegister low = static_cast<SRegister>(sd << 1);
-          SRegister high = static_cast<SRegister>(low + 1);
-          LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
-          if (High32Bits(int_value) == Low32Bits(int_value)) {
-            vmovs(high, low);
-          } else {
-            LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
-          }
-        } else {
-          LOG(FATAL) << "Unimplemented loading of double into a D register "
-                     << "that cannot be split into two S registers";
-        }
-      }
-    }
-  }
+  virtual void LoadDImmediate(DRegister dd, double value, Condition cond = AL) = 0;
 
   virtual void MarkExceptionHandler(Label* label) = 0;
   virtual void LoadFromOffset(LoadOperandType type,
@@ -902,121 +884,6 @@
   virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
   virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0;
 
-  //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-    OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   static uint32_t ModifiedImmediate(uint32_t value);
 
   static bool IsLowRegister(Register r) {
@@ -1041,6 +908,12 @@
     // reg = -reg.
     rsb(reg, reg, ShifterOperand(0));
   }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(Register reg) {
     if (kPoisonHeapReferences) {
@@ -1094,18 +967,6 @@
   ArenaVector<Label*> tracked_labels_;
 };
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class ArmExceptionSlowPath FINAL : public SlowPath {
- public:
-  ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {
-  }
-  void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const ArmManagedRegister scratch_;
-  const size_t stack_adjust_;
-};
-
 }  // namespace arm
 }  // namespace art
 
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 0a227b2..b8eb60c 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1106,6 +1106,18 @@
 }
 
 
+void Arm32Assembler::vldmiad(Register, DRegister, int, Condition) {
+  LOG(FATAL) << "Unimplemented.";
+  UNREACHABLE();
+}
+
+
+void Arm32Assembler::vstmiad(Register, DRegister, int, Condition) {
+  LOG(FATAL) << "Unimplemented.";
+  UNREACHABLE();
+}
+
+
 void Arm32Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) {
   CHECK_NE(cond, kNoCondition);
   CHECK_GT(nregs, 0);
@@ -1264,6 +1276,31 @@
   Emit(encoding);
 }
 
+void Arm32Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
+void Arm32Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
 
 void Arm32Assembler::svc(uint32_t imm24) {
   CHECK(IsUint<24>(imm24)) << imm24;
@@ -1461,6 +1498,34 @@
   }
 }
 
+void Arm32Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) {
+  if (!vmovd(dd, value, cond)) {
+    uint64_t int_value = bit_cast<uint64_t, double>(value);
+    if (int_value == bit_cast<uint64_t, double>(0.0)) {
+      // 0.0 is quite common, so we special case it by loading
+      // 2.0 in `dd` and then subtracting it.
+      bool success = vmovd(dd, 2.0, cond);
+      CHECK(success);
+      vsubd(dd, dd, dd, cond);
+    } else {
+      if (dd < 16) {
+        // Note: Depending on the particular CPU, this may cause register
+        // forwarding hazard, negatively impacting the performance.
+        SRegister low = static_cast<SRegister>(dd << 1);
+        SRegister high = static_cast<SRegister>(low + 1);
+        LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
+        if (High32Bits(int_value) == Low32Bits(int_value)) {
+          vmovs(high, low);
+        } else {
+          LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
+        }
+      } else {
+        LOG(FATAL) << "Unimplemented loading of double into a D register "
+                   << "that cannot be split into two S registers";
+      }
+    }
+  }
+}
 
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetArm.
@@ -1611,12 +1676,6 @@
 }
 
 
-void Arm32Assembler::MemoryBarrier(ManagedRegister mscratch) {
-  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
-  dmb(SY);
-}
-
-
 void Arm32Assembler::dmb(DmbOptions flavor) {
   int32_t encoding = 0xf57ff05f;  // dmb
   Emit(encoding | flavor);
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index bc6020e..0cb6b17 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -205,10 +205,15 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
 
   // Branch instructions.
   void b(Label* label, Condition cond = AL) OVERRIDE;
@@ -267,6 +272,7 @@
 
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
   void LoadFromOffset(LoadOperandType type,
                       Register reg,
@@ -312,8 +318,6 @@
   void Emit(int32_t value);
   void Bind(Label* label) OVERRIDE;
 
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
   JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
   void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
 
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index e570e22..b214062 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -899,4 +899,43 @@
   T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh");
 }
 
+TEST_F(AssemblerArm32Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  GetAssembler()->vcntd(arm::D0, arm::D1);
+  GetAssembler()->vcntd(arm::D19, arm::D20);
+  GetAssembler()->vcntd(arm::D0, arm::D9);
+  GetAssembler()->vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerArm32Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  GetAssembler()->vpaddld(arm::D0, arm::D0, 8, true);
+  GetAssembler()->vpaddld(arm::D20, arm::D20, 8, false);
+  GetAssembler()->vpaddld(arm::D0, arm::D20, 16, false);
+  GetAssembler()->vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 546dd65..ebdfc98 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -1917,7 +1917,7 @@
 
     case kLongOrFPLiteral1KiB:
       return 4u;
-    case kLongOrFPLiteral256KiB:
+    case kLongOrFPLiteral64KiB:
       return 10u;
     case kLongOrFPLiteralFar:
       return 14u;
@@ -1989,7 +1989,7 @@
       break;
     case kLiteral1MiB:
     case kLiteral64KiB:
-    case kLongOrFPLiteral256KiB:
+    case kLongOrFPLiteral64KiB:
     case kLiteralAddr64KiB:
       DCHECK_GE(diff, 4);  // The target must be at least 4 bytes after the ADD rX, PC.
       diff -= 4;        // One extra 32-bit MOV.
@@ -2105,10 +2105,10 @@
       if (IsUint<10>(GetOffset(current_code_size))) {
         break;
       }
-      current_code_size += IncreaseSize(kLongOrFPLiteral256KiB);
+      current_code_size += IncreaseSize(kLongOrFPLiteral64KiB);
       FALLTHROUGH_INTENDED;
-    case kLongOrFPLiteral256KiB:
-      if (IsUint<18>(GetOffset(current_code_size))) {
+    case kLongOrFPLiteral64KiB:
+      if (IsUint<16>(GetOffset(current_code_size))) {
         break;
       }
       current_code_size += IncreaseSize(kLongOrFPLiteralFar);
@@ -2269,11 +2269,10 @@
       buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
-    case kLongOrFPLiteral256KiB: {
-      int32_t offset = GetOffset(code_size);
-      int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff);
+    case kLongOrFPLiteral64KiB: {
+      int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size));
       int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
-      int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff);    // DCHECKs type_.
+      int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u);    // DCHECKs type_.
       buffer->Store<int16_t>(location_, mov_encoding >> 16);
       buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
       buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
@@ -2326,7 +2325,7 @@
   }
 
   Register rn = ad.GetRegister();
-  if (IsHighRegister(rn) && rn != SP && rn != PC) {
+  if (IsHighRegister(rn) && (byte || half || (rn != SP && rn != PC))) {
     must_be_32bit = true;
   }
 
@@ -2338,24 +2337,24 @@
     // Immediate offset
     int32_t offset = ad.GetOffset();
 
-    // The 16 bit SP relative instruction can only have a 10 bit offset.
-    if (rn == SP && offset >= (1 << 10)) {
-      must_be_32bit = true;
-    }
-
     if (byte) {
       // 5 bit offset, no shift.
-      if (offset >= (1 << 5)) {
+      if ((offset & ~0x1f) != 0) {
         must_be_32bit = true;
       }
     } else if (half) {
-      // 6 bit offset, shifted by 1.
-      if (offset >= (1 << 6)) {
+      // 5 bit offset, shifted by 1.
+      if ((offset & ~(0x1f << 1)) != 0) {
+        must_be_32bit = true;
+      }
+    } else if (rn == SP || rn == PC) {
+      // The 16 bit SP/PC relative instruction can only have an (imm8 << 2) offset.
+      if ((offset & ~(0xff << 2)) != 0) {
         must_be_32bit = true;
       }
     } else {
-      // 7 bit offset, shifted by 2.
-      if (offset >= (1 << 7)) {
+      // 5 bit offset, shifted by 2.
+      if ((offset & ~(0x1f << 2)) != 0) {
         must_be_32bit = true;
       }
     }
@@ -2371,7 +2370,7 @@
     } else {
       // 16 bit thumb1.
       uint8_t opA = 0;
-      bool sp_relative = false;
+      bool sp_or_pc_relative = false;
 
       if (byte) {
         opA = 7U /* 0b0111 */;
@@ -2380,7 +2379,10 @@
       } else {
         if (rn == SP) {
           opA = 9U /* 0b1001 */;
-          sp_relative = true;
+          sp_or_pc_relative = true;
+        } else if (rn == PC) {
+          opA = 4U;
+          sp_or_pc_relative = true;
         } else {
           opA = 6U /* 0b0110 */;
         }
@@ -2389,7 +2391,7 @@
           (load ? B11 : 0);
 
       CHECK_GE(offset, 0);
-      if (sp_relative) {
+      if (sp_or_pc_relative) {
         // SP relative, 10 bit offset.
         CHECK_LT(offset, (1 << 10));
         CHECK_ALIGNED(offset, 4);
@@ -2457,6 +2459,9 @@
         } else if (!byte) {
           encoding |= B22;
         }
+        if (load && is_signed && (byte || half)) {
+          encoding |= B24;
+        }
         Emit32(encoding);
       } else {
         // 16 bit register offset.
@@ -3015,9 +3020,49 @@
 }
 
 
+void Thumb2Assembler::vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond) {
+  int32_t rest = B23;
+  EmitVLdmOrStm(rest,
+                static_cast<uint32_t>(reg),
+                nregs,
+                base_reg,
+                /*is_load*/ true,
+                /*dbl*/ true,
+                cond);
+}
+
+
+void Thumb2Assembler::vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond) {
+  int32_t rest = B23;
+  EmitVLdmOrStm(rest,
+                static_cast<uint32_t>(reg),
+                nregs,
+                base_reg,
+                /*is_load*/ false,
+                /*dbl*/ true,
+                cond);
+}
+
+
 void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) {
+  int32_t rest = B21 | (push ? B24 : B23);
+  EmitVLdmOrStm(rest, reg, nregs, SP, /*is_load*/ !push, dbl, cond);
+}
+
+
+void Thumb2Assembler::EmitVLdmOrStm(int32_t rest,
+                                    uint32_t reg,
+                                    int nregs,
+                                    Register rn,
+                                    bool is_load,
+                                    bool dbl,
+                                    Condition cond) {
   CheckCondition(cond);
 
+  DCHECK_GT(nregs, 0);
+  DCHECK_LE(reg + nregs, 32u);
+  DCHECK(!dbl || (nregs <= 16));
+
   uint32_t D;
   uint32_t Vd;
   if (dbl) {
@@ -3029,14 +3074,17 @@
     D = reg & 1;
     Vd = (reg >> 1) & 15U /* 0b1111 */;
   }
-  int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 |
-                    B11 | B9 |
-        (dbl ? B8 : 0) |
-        (push ? B24 : (B23 | B20)) |
-        14U /* 0b1110 */ << 28 |
-        nregs << (dbl ? 1 : 0) |
-        D << 22 |
-        Vd << 12;
+
+  int32_t encoding = rest |
+                     14U /* 0b1110 */ << 28 |
+                     B27 | B26 | B11 | B9 |
+                     (is_load ? B20 : 0) |
+                     static_cast<int16_t>(rn) << 16 |
+                     D << 22 |
+                     Vd << 12 |
+                     (dbl ? B8 : 0) |
+                     nregs << (dbl ? 1 : 0);
+
   Emit32(encoding);
 }
 
@@ -3117,6 +3165,30 @@
   Emit32(encoding);
 }
 
+void Thumb2Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
+
+void Thumb2Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
 
 void Thumb2Assembler::svc(uint32_t imm8) {
   CHECK(IsUint<8>(imm8)) << imm8;
@@ -3574,6 +3646,24 @@
   }
 }
 
+void Thumb2Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) {
+  if (!vmovd(dd, value, cond)) {
+    uint64_t int_value = bit_cast<uint64_t, double>(value);
+    if (int_value == bit_cast<uint64_t, double>(0.0)) {
+      // 0.0 is quite common, so we special case it by loading
+      // 2.0 in `dd` and then subtracting it.
+      bool success = vmovd(dd, 2.0, cond);
+      CHECK(success);
+      vsubd(dd, dd, dd, cond);
+    } else {
+      Literal* literal = literal64_dedupe_map_.GetOrCreate(
+          int_value,
+          [this, int_value]() { return NewLiteral<uint64_t>(int_value); });
+      LoadLiteral(dd, literal);
+    }
+  }
+}
+
 int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
   switch (type) {
     case kLoadSignedByte:
@@ -3816,12 +3906,6 @@
 }
 
 
-void Thumb2Assembler::MemoryBarrier(ManagedRegister mscratch) {
-  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
-  dmb(SY);
-}
-
-
 void Thumb2Assembler::dmb(DmbOptions flavor) {
   int32_t encoding = 0xf3bf8f50;  // dmb in T1 encoding.
   Emit32(encoding | flavor);
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index ce310a4..13f3bec 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -43,6 +43,7 @@
         fixups_(arena->Adapter(kArenaAllocAssembler)),
         fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
         literals_(arena->Adapter(kArenaAllocAssembler)),
+        literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)),
         jump_tables_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0u),
         last_old_position_(0u),
@@ -250,10 +251,15 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
 
   // Branch instructions.
   void b(Label* label, Condition cond = AL);
@@ -316,6 +322,7 @@
 
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
   void LoadFromOffset(LoadOperandType type,
                       Register reg,
@@ -363,8 +370,6 @@
   void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
   void Bind(Label* label) OVERRIDE;
 
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
   // Force the assembler to generate 32 bit instructions.
   void Force32Bit() {
     force_32bit_ = true;
@@ -461,8 +466,8 @@
       // Load long or FP literal variants.
       // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
       kLongOrFPLiteral1KiB,
-      // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes.
-      kLongOrFPLiteral256KiB,
+      // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes.
+      kLongOrFPLiteral64KiB,
       // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
       kLongOrFPLiteralFar,
     };
@@ -497,7 +502,7 @@
     // Load wide literal.
     static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
                                  Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
       return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
@@ -507,7 +512,7 @@
     // Load FP single literal.
     static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
                                    Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
                    AL, kLoadFPLiteralSingle, size, location);
@@ -516,7 +521,7 @@
     // Load FP double literal.
     static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
                                    Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
                    AL, kLoadFPLiteralDouble, size, location);
@@ -745,6 +750,14 @@
                   SRegister sn,
                   SRegister sm);
 
+  void EmitVLdmOrStm(int32_t rest,
+                     uint32_t reg,
+                     int nregs,
+                     Register rn,
+                     bool is_load,
+                     bool dbl,
+                     Condition cond);
+
   void EmitVFPddd(Condition cond,
                   int32_t opcode,
                   DRegister dd,
@@ -867,6 +880,9 @@
   // without invalidating pointers and references to existing elements.
   ArenaDeque<Literal> literals_;
 
+  // Deduplication map for 64-bit literals, used for LoadDImmediate().
+  ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_;
+
   // Jump table list.
   ArenaDeque<JumpTable> jump_tables_;
 
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index b5cafcb..d0799d6 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -279,6 +279,148 @@
   DriverStr(expected, "smull");
 }
 
+TEST_F(AssemblerThumb2Test, LoadByteFromThumbOffset) {
+  arm::LoadOperandType type = arm::kLoadUnsignedByte;
+
+  __ LoadFromOffset(type, arm::R0, arm::R7, 0);
+  __ LoadFromOffset(type, arm::R1, arm::R7, 31);
+  __ LoadFromOffset(type, arm::R2, arm::R7, 32);
+  __ LoadFromOffset(type, arm::R3, arm::R7, 4095);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 0);
+
+  const char* expected =
+      "ldrb r0, [r7, #0]\n"
+      "ldrb r1, [r7, #31]\n"
+      "ldrb.w r2, [r7, #32]\n"
+      "ldrb.w r3, [r7, #4095]\n"
+      "ldrb.w r4, [sp, #0]\n";
+  DriverStr(expected, "LoadByteFromThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreByteToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreByte;
+
+  __ StoreToOffset(type, arm::R0, arm::R7, 0);
+  __ StoreToOffset(type, arm::R1, arm::R7, 31);
+  __ StoreToOffset(type, arm::R2, arm::R7, 32);
+  __ StoreToOffset(type, arm::R3, arm::R7, 4095);
+  __ StoreToOffset(type, arm::R4, arm::SP, 0);
+
+  const char* expected =
+      "strb r0, [r7, #0]\n"
+      "strb r1, [r7, #31]\n"
+      "strb.w r2, [r7, #32]\n"
+      "strb.w r3, [r7, #4095]\n"
+      "strb.w r4, [sp, #0]\n";
+  DriverStr(expected, "StoreByteToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadHalfFromThumbOffset) {
+  arm::LoadOperandType type = arm::kLoadUnsignedHalfword;
+
+  __ LoadFromOffset(type, arm::R0, arm::R7, 0);
+  __ LoadFromOffset(type, arm::R1, arm::R7, 62);
+  __ LoadFromOffset(type, arm::R2, arm::R7, 64);
+  __ LoadFromOffset(type, arm::R3, arm::R7, 4094);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 0);
+  __ LoadFromOffset(type, arm::R5, arm::R7, 1);  // Unaligned
+
+  const char* expected =
+      "ldrh r0, [r7, #0]\n"
+      "ldrh r1, [r7, #62]\n"
+      "ldrh.w r2, [r7, #64]\n"
+      "ldrh.w r3, [r7, #4094]\n"
+      "ldrh.w r4, [sp, #0]\n"
+      "ldrh.w r5, [r7, #1]\n";
+  DriverStr(expected, "LoadHalfFromThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreHalfToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreHalfword;
+
+  __ StoreToOffset(type, arm::R0, arm::R7, 0);
+  __ StoreToOffset(type, arm::R1, arm::R7, 62);
+  __ StoreToOffset(type, arm::R2, arm::R7, 64);
+  __ StoreToOffset(type, arm::R3, arm::R7, 4094);
+  __ StoreToOffset(type, arm::R4, arm::SP, 0);
+  __ StoreToOffset(type, arm::R5, arm::R7, 1);  // Unaligned
+
+  const char* expected =
+      "strh r0, [r7, #0]\n"
+      "strh r1, [r7, #62]\n"
+      "strh.w r2, [r7, #64]\n"
+      "strh.w r3, [r7, #4094]\n"
+      "strh.w r4, [sp, #0]\n"
+      "strh.w r5, [r7, #1]\n";
+  DriverStr(expected, "StoreHalfToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadWordFromSpPlusOffset) {
+  arm::LoadOperandType type = arm::kLoadWord;
+
+  __ LoadFromOffset(type, arm::R0, arm::SP, 0);
+  __ LoadFromOffset(type, arm::R1, arm::SP, 124);
+  __ LoadFromOffset(type, arm::R2, arm::SP, 128);
+  __ LoadFromOffset(type, arm::R3, arm::SP, 1020);
+  __ LoadFromOffset(type, arm::R4, arm::SP, 1024);
+  __ LoadFromOffset(type, arm::R5, arm::SP, 4092);
+  __ LoadFromOffset(type, arm::R6, arm::SP, 1);  // Unaligned
+
+  const char* expected =
+      "ldr r0, [sp, #0]\n"
+      "ldr r1, [sp, #124]\n"
+      "ldr r2, [sp, #128]\n"
+      "ldr r3, [sp, #1020]\n"
+      "ldr.w r4, [sp, #1024]\n"
+      "ldr.w r5, [sp, #4092]\n"
+      "ldr.w r6, [sp, #1]\n";
+  DriverStr(expected, "LoadWordFromSpPlusOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordToSpPlusOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+
+  __ StoreToOffset(type, arm::R0, arm::SP, 0);
+  __ StoreToOffset(type, arm::R1, arm::SP, 124);
+  __ StoreToOffset(type, arm::R2, arm::SP, 128);
+  __ StoreToOffset(type, arm::R3, arm::SP, 1020);
+  __ StoreToOffset(type, arm::R4, arm::SP, 1024);
+  __ StoreToOffset(type, arm::R5, arm::SP, 4092);
+  __ StoreToOffset(type, arm::R6, arm::SP, 1);  // Unaligned
+
+  const char* expected =
+      "str r0, [sp, #0]\n"
+      "str r1, [sp, #124]\n"
+      "str r2, [sp, #128]\n"
+      "str r3, [sp, #1020]\n"
+      "str.w r4, [sp, #1024]\n"
+      "str.w r5, [sp, #4092]\n"
+      "str.w r6, [sp, #1]\n";
+  DriverStr(expected, "StoreWordToSpPlusOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadWordFromPcPlusOffset) {
+  arm::LoadOperandType type = arm::kLoadWord;
+
+  __ LoadFromOffset(type, arm::R0, arm::PC, 0);
+  __ LoadFromOffset(type, arm::R1, arm::PC, 124);
+  __ LoadFromOffset(type, arm::R2, arm::PC, 128);
+  __ LoadFromOffset(type, arm::R3, arm::PC, 1020);
+  __ LoadFromOffset(type, arm::R4, arm::PC, 1024);
+  __ LoadFromOffset(type, arm::R5, arm::PC, 4092);
+  __ LoadFromOffset(type, arm::R6, arm::PC, 1);  // Unaligned
+
+  const char* expected =
+      "ldr r0, [pc, #0]\n"
+      "ldr r1, [pc, #124]\n"
+      "ldr r2, [pc, #128]\n"
+      "ldr r3, [pc, #1020]\n"
+      "ldr.w r4, [pc, #1024]\n"
+      "ldr.w r5, [pc, #4092]\n"
+      "ldr.w r6, [pc, #1]\n";
+  DriverStr(expected, "LoadWordFromPcPlusOffset");
+}
+
 TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
   arm::StoreOperandType type = arm::kStoreWord;
   int32_t offset = 4092;
@@ -869,10 +1011,11 @@
   }
 
   std::string expected =
-      "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #(0x408 - 0x4 - 4)\n"
       "1:\n"
       "add ip, pc\n"
-      "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
+      "ldrd r1, r3, [ip, #0]\n" +
       RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
       ".align 2, 0\n"
       "2:\n"
@@ -884,48 +1027,78 @@
             __ GetAdjustedPosition(label.Position()));
 }
 
-TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) {
+TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB) {
   // The literal size must match but the type doesn't, so use an int32_t rather than float.
   arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
   __ LoadLiteral(arm::S3, literal);
   Label label;
   __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 3u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
-      "1:\n"
-      "add ip, pc\n"
-      "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralSingleMax256KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) {
-  // The literal size must match but the type doesn't, so use an int64_t rather than double.
-  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
-  __ LoadLiteral(arm::D3, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 2u;
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 3u;
   for (size_t i = 0; i != kLdrR0R0Count; ++i) {
     __ ldr(arm::R0, arm::Address(arm::R0));
   }
 
   std::string expected =
       // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw ip, #(0x40000 & 0xffff)\n"
+      "movw ip, #(0x10004 - 0x4 - 4)\n"
+      "1:\n"
+      "add ip, pc\n"
+      "vldr s3, [ip, #0]\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2, 0\n"
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadLiteralSingleMax64KiB");
+
+  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
+            __ GetAdjustedPosition(label.Position()));
+}
+
+TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB_UnalignedPC) {
+  // The literal size must match but the type doesn't, so use an int32_t rather than float.
+  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
+  __ ldr(arm::R0, arm::Address(arm::R0));
+  __ LoadLiteral(arm::S3, literal);
+  Label label;
+  __ Bind(&label);
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 4u;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  std::string expected =
+      "ldr r0, [r0]\n"
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #(0x10004 - 0x6 - 4)\n"
+      "1:\n"
+      "add ip, pc\n"
+      "vldr s3, [ip, #0]\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2, 0\n"
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadLiteralSingleMax64KiB_UnalignedPC");
+
+  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
+            __ GetAdjustedPosition(label.Position()));
+}
+
+TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax64KiB) {
+  // The literal size must match but the type doesn't, so use an int64_t rather than double.
+  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
+  __ LoadLiteral(arm::D3, literal);
+  Label label;
+  __ Bind(&label);
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 2u;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  std::string expected =
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #((0x1000c - 0x8 - 4) & 0xffff)\n"
       // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt ip, #(0x40000 >> 16)\n"
+      "movt ip, #((0x1000c - 0x8 - 4) >> 16)\n"
       "1:\n"
       "add ip, pc\n"
       "vldr d3, [ip, #0]\n" +
@@ -934,7 +1107,7 @@
       "2:\n"
       ".word 0x87654321\n"
       ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB");
+  DriverStr(expected, "LoadLiteralDoubleBeyondMax64KiB");
 
   EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u,
             __ GetAdjustedPosition(label.Position()));
@@ -946,16 +1119,16 @@
   __ LoadLiteral(arm::D3, literal);
   Label label;
   __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234;
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 2u + 0x1234;
   for (size_t i = 0; i != kLdrR0R0Count; ++i) {
     __ ldr(arm::R0, arm::Address(arm::R0));
   }
 
   std::string expected =
       // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n"
+      "movw ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) & 0xffff)\n"
       // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n"
+      "movt ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) >> 16)\n"
       "1:\n"
       "add ip, pc\n"
       "vldr d3, [ip, #0]\n" +
@@ -1380,4 +1553,104 @@
   DriverStr(expected, "revsh");
 }
 
+TEST_F(AssemblerThumb2Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  __ vcntd(arm::D0, arm::D1);
+  __ vcntd(arm::D19, arm::D20);
+  __ vcntd(arm::D0, arm::D9);
+  __ vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerThumb2Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  __ vpaddld(arm::D0, arm::D0, 8, true);
+  __ vpaddld(arm::D20, arm::D20, 8, false);
+  __ vpaddld(arm::D0, arm::D20, 16, false);
+  __ vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
+TEST_F(AssemblerThumb2Test, LoadFromShiftedRegOffset) {
+  arm::Address mem_address(arm::R0, arm::R1, arm::Shift::LSL, 2);
+
+  __ ldrsb(arm::R2, mem_address);
+  __ ldrb(arm::R2, mem_address);
+  __ ldrsh(arm::R2, mem_address);
+  __ ldrh(arm::R2, mem_address);
+  __ ldr(arm::R2, mem_address);
+
+  std::string expected =
+      "ldrsb r2, [r0, r1, LSL #2]\n"
+      "ldrb r2, [r0, r1, LSL #2]\n"
+      "ldrsh r2, [r0, r1, LSL #2]\n"
+      "ldrh r2, [r0, r1, LSL #2]\n"
+      "ldr r2, [r0, r1, LSL #2]\n";
+
+  DriverStr(expected, "LoadFromShiftedRegOffset");
+}
+
+TEST_F(AssemblerThumb2Test, VStmLdmPushPop) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  __ vstmiad(arm::R0, arm::D0, 4);
+  __ vldmiad(arm::R1, arm::D9, 5);
+  __ vpopd(arm::D0, 4);
+  __ vpushd(arm::D9, 5);
+  __ vpops(arm::S0, 4);
+  __ vpushs(arm::S9, 5);
+  __ vpushs(arm::S16, 5);
+  __ vpushd(arm::D0, 16);
+  __ vpushd(arm::D1, 15);
+  __ vpushd(arm::D8, 16);
+  __ vpushd(arm::D31, 1);
+  __ vpushs(arm::S0, 32);
+  __ vpushs(arm::S1, 31);
+  __ vpushs(arm::S16, 16);
+  __ vpushs(arm::S31, 1);
+
+  std::string expected =
+      "vstmia r0, {d0 - d3}\n"
+      "vldmia r1, {d9 - d13}\n"
+      "vpop {d0 - d3}\n"
+      "vpush {d9 - d13}\n"
+      "vpop {s0 - s3}\n"
+      "vpush {s9 - s13}\n"
+      "vpush {s16 - s20}\n"
+      "vpush {d0 - d15}\n"
+      "vpush {d1 - d15}\n"
+      "vpush {d8 - d23}\n"
+      "vpush {d31}\n"
+      "vpush {s0 - s31}\n"
+      "vpush {s1 - s31}\n"
+      "vpush {s16 - s31}\n"
+      "vpush {s31}\n";
+
+  DriverStr(expected, "VStmLdmPushPop");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.cc b/compiler/utils/arm/jni_macro_assembler_arm.cc
new file mode 100644
index 0000000..af5ebb4
--- /dev/null
+++ b/compiler/utils/arm/jni_macro_assembler_arm.cc
@@ -0,0 +1,618 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_arm.h"
+
+#include <algorithm>
+
+#include "assembler_arm32.h"
+#include "assembler_thumb2.h"
+#include "base/arena_allocator.h"
+#include "base/bit_utils.h"
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "offsets.h"
+#include "thread.h"
+
+namespace art {
+namespace arm {
+
+constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class ArmExceptionSlowPath FINAL : public SlowPath {
+ public:
+  ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {
+  }
+  void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const ArmManagedRegister scratch_;
+  const size_t stack_adjust_;
+};
+
+ArmJNIMacroAssembler::ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      asm_.reset(new (arena) Arm32Assembler(arena));
+      break;
+
+    case kThumb2:
+      asm_.reset(new (arena) Thumb2Assembler(arena));
+      break;
+
+    default:
+      LOG(FATAL) << isa;
+      UNREACHABLE();
+  }
+}
+
+ArmJNIMacroAssembler::~ArmJNIMacroAssembler() {
+}
+
+size_t ArmJNIMacroAssembler::CodeSize() const {
+  return asm_->CodeSize();
+}
+
+DebugFrameOpCodeWriterForAssembler& ArmJNIMacroAssembler::cfi() {
+  return asm_->cfi();
+}
+
+void ArmJNIMacroAssembler::FinalizeCode() {
+  asm_->FinalizeCode();
+}
+
+void ArmJNIMacroAssembler::FinalizeInstructions(const MemoryRegion& region) {
+  asm_->FinalizeInstructions(region);
+}
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+  return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
+#define __ asm_->
+
+void ArmJNIMacroAssembler::BuildFrame(size_t frame_size,
+                                      ManagedRegister method_reg,
+                                      ArrayRef<const ManagedRegister> callee_save_regs,
+                                      const ManagedRegisterEntrySpills& entry_spills) {
+  CHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
+
+  // Push callee saves and link register.
+  RegList core_spill_mask = 1 << LR;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
+    } else {
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
+    }
+  }
+  __ PushList(core_spill_mask);
+  cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
+  cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
+  if (fp_spill_mask != 0) {
+    __ vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
+    cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
+  }
+
+  // Increase frame to required size.
+  int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
+  CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
+  IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
+
+  // Write out Method*.
+  __ StoreToOffset(kStoreWord, R0, SP, 0);
+
+  // Write out entry spills.
+  int32_t offset = frame_size + kFramePointerSize;
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ArmManagedRegister reg = entry_spills.at(i).AsArm();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      ManagedRegisterSpill spill = entry_spills.at(i);
+      offset += spill.getSize();
+    } else if (reg.IsCoreRegister()) {
+      __ StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsSRegister()) {
+      __ StoreSToOffset(reg.AsSRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsDRegister()) {
+      __ StoreDToOffset(reg.AsDRegister(), SP, offset);
+      offset += 8;
+    }
+  }
+}
+
+void ArmJNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                       ArrayRef<const ManagedRegister> callee_save_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+
+  // Compute callee saves to pop and PC.
+  RegList core_spill_mask = 1 << PC;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
+    } else {
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
+    }
+  }
+
+  // Decrease frame to start of callee saves.
+  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
+  CHECK_GT(frame_size, pop_values * kFramePointerSize);
+  DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
+
+  if (fp_spill_mask != 0) {
+    __ vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
+    cfi().RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
+  }
+
+  // Pop callee saves and PC.
+  __ PopList(core_spill_mask);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void ArmJNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  __ AddConstant(SP, -adjust);
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(ArmAssembler* assembler, size_t adjust) {
+  assembler->AddConstant(SP, adjust);
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void ArmJNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(asm_.get(), adjust);
+}
+
+void ArmJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
+  ArmManagedRegister src = msrc.AsArm();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCoreRegister()) {
+    CHECK_EQ(4u, size);
+    __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
+    __ StoreToOffset(kStoreWord, src.AsRegisterPairHigh(), SP, dest.Int32Value() + 4);
+  } else if (src.IsSRegister()) {
+    __ StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value());
+  } else {
+    CHECK(src.IsDRegister()) << src;
+    __ StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
+  }
+}
+
+void ArmJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  ArmManagedRegister src = msrc.AsArm();
+  CHECK(src.IsCoreRegister()) << src;
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  ArmManagedRegister src = msrc.AsArm();
+  CHECK(src.IsCoreRegister()) << src;
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreSpanning(FrameOffset dest,
+                                         ManagedRegister msrc,
+                                         FrameOffset in_off,
+                                         ManagedRegister mscratch) {
+  ArmManagedRegister src = msrc.AsArm();
+  ArmManagedRegister scratch = mscratch.AsArm();
+  __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + sizeof(uint32_t));
+}
+
+void ArmJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest,
+                                   ManagedRegister mbase,
+                                   MemberOffset offs,
+                                   bool unpoison_reference) {
+  ArmManagedRegister base = mbase.AsArm();
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(base.IsCoreRegister()) << base;
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord,
+                    dst.AsCoreRegister(),
+                    base.AsCoreRegister(),
+                    offs.Int32Value());
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dst.AsCoreRegister());
+  }
+}
+
+void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset  src) {
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
+                                      ManagedRegister mbase,
+                                      Offset offs) {
+  ArmManagedRegister base = mbase.AsArm();
+  ArmManagedRegister dst = mdest.AsArm();
+  CHECK(base.IsCoreRegister()) << base;
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord,
+                    dst.AsCoreRegister(),
+                    base.AsCoreRegister(),
+                    offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
+                                                 uint32_t imm,
+                                                 ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadImmediate(scratch.AsCoreRegister(), imm);
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+}
+
+static void EmitLoad(ArmAssembler* assembler,
+                     ManagedRegister m_dst,
+                     Register src_register,
+                     int32_t src_offset,
+                     size_t size) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  if (dst.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dst;
+  } else if (dst.IsCoreRegister()) {
+    CHECK_EQ(4u, size) << dst;
+    assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
+  } else if (dst.IsRegisterPair()) {
+    CHECK_EQ(8u, size) << dst;
+    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset);
+    assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4);
+  } else if (dst.IsSRegister()) {
+    assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset);
+  } else {
+    CHECK(dst.IsDRegister()) << dst;
+    assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset);
+  }
+}
+
+void ArmJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
+  EmitLoad(asm_.get(), m_dst, SP, src.Int32Value(), size);
+}
+
+void ArmJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset32 src, size_t size) {
+  EmitLoad(asm_.get(), m_dst, TR, src.Int32Value(), size);
+}
+
+void ArmJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  CHECK(dst.IsCoreRegister()) << dst;
+  __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                ThreadOffset32 thr_offs,
+                                                ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                              FrameOffset fr_offs,
+                                              ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                                    FrameOffset fr_offs,
+                                                    ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL);
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
+  __ StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value());
+}
+
+void ArmJNIMacroAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
+}
+
+void ArmJNIMacroAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
+}
+
+void ArmJNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) {
+  ArmManagedRegister dst = m_dst.AsArm();
+  ArmManagedRegister src = m_src.AsArm();
+  if (!dst.Equals(src)) {
+    if (dst.IsCoreRegister()) {
+      CHECK(src.IsCoreRegister()) << src;
+      __ mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
+    } else if (dst.IsDRegister()) {
+      CHECK(src.IsDRegister()) << src;
+      __ vmovd(dst.AsDRegister(), src.AsDRegister());
+    } else if (dst.IsSRegister()) {
+      CHECK(src.IsSRegister()) << src;
+      __ vmovs(dst.AsSRegister(), src.AsSRegister());
+    } else {
+      CHECK(dst.IsRegisterPair()) << dst;
+      CHECK(src.IsRegisterPair()) << src;
+      // Ensure that the first move doesn't clobber the input of the second.
+      if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
+        __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
+        __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
+      } else {
+        __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
+        __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
+      }
+    }
+  }
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset dest,
+                                FrameOffset src,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
+    __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
+  }
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset dest,
+                                ManagedRegister src_base,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  CHECK_EQ(size, 4u);
+  __ LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  CHECK_EQ(size, 4u);
+  __ LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
+  __ StoreToOffset(kStoreWord,
+                   scratch,
+                   dest_base.AsArm().AsCoreRegister(),
+                   dest_offset.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                FrameOffset /*src_base*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*mscratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::Copy(ManagedRegister dest,
+                                Offset dest_offset,
+                                ManagedRegister src,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  CHECK_EQ(size, 4u);
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  __ LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value());
+  __ StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value());
+}
+
+void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                Offset /*dest_offset*/,
+                                FrameOffset /*src*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*scratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister min_reg,
+                                                  bool null_allowed) {
+  ArmManagedRegister out_reg = mout_reg.AsArm();
+  ArmManagedRegister in_reg = min_reg.AsArm();
+  CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
+  CHECK(out_reg.IsCoreRegister()) << out_reg;
+  if (null_allowed) {
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    if (in_reg.IsNoRegister()) {
+      __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+      in_reg = out_reg;
+    }
+    __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
+    if (!out_reg.Equals(in_reg)) {
+      __ it(EQ, kItElse);
+      __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
+    } else {
+      __ it(NE);
+    }
+    __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
+  } else {
+    __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
+  }
+}
+
+void ArmJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister mscratch,
+                                                  bool null_allowed) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  if (null_allowed) {
+    __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
+    __ cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+    __ it(NE);
+    __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
+  } else {
+    __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
+  }
+  __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
+}
+
+void ArmJNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                        ManagedRegister min_reg) {
+  ArmManagedRegister out_reg = mout_reg.AsArm();
+  ArmManagedRegister in_reg = min_reg.AsArm();
+  CHECK(out_reg.IsCoreRegister()) << out_reg;
+  CHECK(in_reg.IsCoreRegister()) << in_reg;
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);     // TODO: why EQ?
+  }
+  __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
+  __ it(NE);
+  __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), in_reg.AsCoreRegister(), 0, NE);
+}
+
+void ArmJNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void ArmJNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void ArmJNIMacroAssembler::Call(ManagedRegister mbase,
+                                Offset offset,
+                                ManagedRegister mscratch) {
+  ArmManagedRegister base = mbase.AsArm();
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(base.IsCoreRegister()) << base;
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    base.AsCoreRegister(),
+                    offset.Int32Value());
+  __ blx(scratch.AsCoreRegister());
+  // TODO: place reference map on call.
+}
+
+void ArmJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  // Call *(*(SP + base) + offset)
+  __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, base.Int32Value());
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    scratch.AsCoreRegister(),
+                    offset.Int32Value());
+  __ blx(scratch.AsCoreRegister());
+  // TODO: place reference map on call
+}
+
+void ArmJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
+                                          ManagedRegister scratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmJNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR));
+}
+
+void ArmJNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /*scratch*/) {
+  __ StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL);
+}
+
+void ArmJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
+  ArmManagedRegister scratch = mscratch.AsArm();
+  ArmExceptionSlowPath* slow = new (__ GetArena()) ArmExceptionSlowPath(scratch, stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ LoadFromOffset(kLoadWord,
+                    scratch.AsCoreRegister(),
+                    TR,
+                    Thread::ExceptionOffset<kArmPointerSize>().Int32Value());
+  __ cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+  __ b(slow->Entry(), NE);
+}
+
+#undef __
+
+void ArmExceptionSlowPath::Emit(Assembler* sasm) {
+  ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(sp_asm, stack_adjust_);
+  }
+  // Pass exception object as argument.
+  // Don't care about preserving R0 as this call won't return.
+  __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
+  // Set up call to Thread::Current()->pDeliverException.
+  __ LoadFromOffset(kLoadWord,
+                    R12,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value());
+  __ blx(R12);
+#undef __
+}
+
+void ArmJNIMacroAssembler::MemoryBarrier(ManagedRegister mscratch) {
+  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
+  asm_->dmb(SY);
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.h b/compiler/utils/arm/jni_macro_assembler_arm.h
new file mode 100644
index 0000000..4471906
--- /dev/null
+++ b/compiler/utils/arm/jni_macro_assembler_arm.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
+#define ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
+
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "utils/jni_macro_assembler.h"
+#include "offsets.h"
+
+namespace art {
+namespace arm {
+
+class ArmAssembler;
+
+class ArmJNIMacroAssembler : public JNIMacroAssembler<PointerSize::k32> {
+ public:
+  ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa);
+  virtual ~ArmJNIMacroAssembler();
+
+  size_t CodeSize() const OVERRIDE;
+  DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE;
+  void FinalizeCode() OVERRIDE;
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE;
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+    OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
+                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
+                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+ private:
+  std::unique_ptr<ArmAssembler> asm_;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index 5b84058..276db44 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -85,34 +85,34 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class ArmManagedRegister : public ManagedRegister {
  public:
-  Register AsCoreRegister() const {
+  constexpr Register AsCoreRegister() const {
     CHECK(IsCoreRegister());
     return static_cast<Register>(id_);
   }
 
-  SRegister AsSRegister() const {
+  constexpr SRegister AsSRegister() const {
     CHECK(IsSRegister());
     return static_cast<SRegister>(id_ - kNumberOfCoreRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfSRegIds);
   }
 
-  SRegister AsOverlappingDRegisterLow() const {
+  constexpr SRegister AsOverlappingDRegisterLow() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<SRegister>(d_reg * 2);
   }
 
-  SRegister AsOverlappingDRegisterHigh() const {
+  constexpr SRegister AsOverlappingDRegisterHigh() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<SRegister>(d_reg * 2 + 1);
   }
 
-  RegisterPair AsRegisterPair() const {
+  constexpr RegisterPair AsRegisterPair() const {
     CHECK(IsRegisterPair());
     Register reg_low = AsRegisterPairLow();
     if (reg_low == R1) {
@@ -122,50 +122,50 @@
     }
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCoreRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCoreRegister();
   }
 
-  bool IsCoreRegister() const {
+  constexpr bool IsCoreRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCoreRegIds);
   }
 
-  bool IsSRegister() const {
+  constexpr bool IsSRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCoreRegIds;
     return (0 <= test) && (test < kNumberOfSRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
   // Returns true if this DRegister overlaps SRegisters.
-  bool IsOverlappingDRegister() const {
+  constexpr bool IsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds);
     return (0 <= test) && (test < kNumberOfOverlappingDRegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test =
         id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds + kNumberOfDRegIds);
     return (0 <= test) && (test < kNumberOfPairRegIds);
   }
 
-  bool IsSameType(ArmManagedRegister test) const {
+  constexpr bool IsSameType(ArmManagedRegister test) const {
     CHECK(IsValidManagedRegister() && test.IsValidManagedRegister());
     return
       (IsCoreRegister() && test.IsCoreRegister()) ||
@@ -182,29 +182,29 @@
 
   void Print(std::ostream& os) const;
 
-  static ArmManagedRegister FromCoreRegister(Register r) {
+  static constexpr ArmManagedRegister FromCoreRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static ArmManagedRegister FromSRegister(SRegister r) {
+  static constexpr ArmManagedRegister FromSRegister(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + kNumberOfCoreRegIds);
   }
 
-  static ArmManagedRegister FromDRegister(DRegister r) {
+  static constexpr ArmManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfSRegIds));
   }
 
-  static ArmManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr ArmManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCoreRegIds +
                           kNumberOfSRegIds + kNumberOfDRegIds));
   }
 
   // Return a RegisterPair consisting of Register r_low and r_low + 1.
-  static ArmManagedRegister FromCoreRegisterPair(Register r_low) {
+  static constexpr ArmManagedRegister FromCoreRegisterPair(Register r_low) {
     if (r_low != R1) {  // not the dalvik special case
       CHECK_NE(r_low, kNoRegister);
       CHECK_EQ(0, (r_low % 2));
@@ -217,7 +217,7 @@
   }
 
   // Return a DRegister overlapping SRegister r_low and r_low + 1.
-  static ArmManagedRegister FromSRegisterPair(SRegister r_low) {
+  static constexpr ArmManagedRegister FromSRegisterPair(SRegister r_low) {
     CHECK_NE(r_low, kNoSRegister);
     CHECK_EQ(0, (r_low % 2));
     const int r = r_low / 2;
@@ -226,7 +226,7 @@
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
@@ -251,9 +251,9 @@
 
   friend class ManagedRegister;
 
-  explicit ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static ArmManagedRegister FromRegId(int reg_id) {
+  static constexpr ArmManagedRegister FromRegId(int reg_id) {
     ArmManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -264,7 +264,7 @@
 
 }  // namespace arm
 
-inline arm::ArmManagedRegister ManagedRegister::AsArm() const {
+constexpr inline arm::ArmManagedRegister ManagedRegister::AsArm() const {
   arm::ArmManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index eb5112b..f91bcfa 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -20,7 +20,7 @@
 #include "offsets.h"
 #include "thread.h"
 
-using namespace vixl;  // NOLINT(build/namespaces)
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
 namespace art {
 namespace arm64 {
@@ -28,627 +28,71 @@
 #ifdef ___
 #error "ARM64 Assembler macro already defined."
 #else
-#define ___   vixl_masm_->
+#define ___   vixl_masm_.
 #endif
 
 void Arm64Assembler::FinalizeCode() {
-  for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) {
-    EmitExceptionPoll(exception.get());
-  }
   ___ FinalizeCode();
 }
 
 size_t Arm64Assembler::CodeSize() const {
-  return vixl_masm_->BufferCapacity() - vixl_masm_->RemainingBufferSpace();
+  return vixl_masm_.GetSizeOfCodeGenerated();
 }
 
 const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
-  return vixl_masm_->GetStartAddress<uint8_t*>();
+  return vixl_masm_.GetStartAddress<uint8_t*>();
 }
 
 void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) {
   // Copy the instructions from the buffer.
-  MemoryRegion from(vixl_masm_->GetStartAddress<void*>(), CodeSize());
+  MemoryRegion from(vixl_masm_.GetStartAddress<void*>(), CodeSize());
   region.CopyFrom(0, from);
 }
 
-void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
-}
-
-void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
-  StoreToOffset(TR, SP, offset.Int32Value());
-}
-
-// See Arm64 PCS Section 5.2.2.1.
-void Arm64Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, -adjust);
-  cfi().AdjustCFAOffset(adjust);
-}
-
-// See Arm64 PCS Section 5.2.2.1.
-void Arm64Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, adjust);
-  cfi().AdjustCFAOffset(-adjust);
-}
-
-void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
-  AddConstant(rd, rd, value, cond);
-}
-
-void Arm64Assembler::AddConstant(XRegister rd, XRegister rn, int32_t value,
-                                 Condition cond) {
-  if ((cond == al) || (cond == nv)) {
-    // VIXL macro-assembler handles all variants.
-    ___ Add(reg_x(rd), reg_x(rn), value);
-  } else {
-    // temp = rd + value
-    // rd = cond ? temp : rn
-    vixl::UseScratchRegisterScope temps(vixl_masm_);
-    temps.Exclude(reg_x(rd), reg_x(rn));
-    vixl::Register temp = temps.AcquireX();
-    ___ Add(temp, reg_x(rn), value);
-    ___ Csel(reg_x(rd), temp, reg_x(rd), cond);
-  }
-}
-
-void Arm64Assembler::StoreWToOffset(StoreOperandType type, WRegister source,
-                                    XRegister base, int32_t offset) {
-  switch (type) {
-    case kStoreByte:
-      ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    case kStoreHalfword:
-      ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    case kStoreWord:
-      ___ Str(reg_w(source), MEM_OP(reg_x(base), offset));
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
-}
-
-void Arm64Assembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) {
-  CHECK_NE(source, SP);
-  ___ Str(reg_x(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) {
-  ___ Str(reg_s(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) {
-  ___ Str(reg_d(source), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsWRegister()) {
-    CHECK_EQ(4u, size);
-    StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value());
-  } else if (src.IsXRegister()) {
-    CHECK_EQ(8u, size);
-    StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
-  } else if (src.IsSRegister()) {
-    StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value());
-  } else {
-    CHECK(src.IsDRegister()) << src;
-    StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value());
-  }
-}
-
-void Arm64Assembler::StoreRef(FrameOffset offs, ManagedRegister m_src) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  CHECK(src.IsXRegister()) << src;
-  StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP,
-                 offs.Int32Value());
-}
-
-void Arm64Assembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
-  Arm64ManagedRegister src = m_src.AsArm64();
-  CHECK(src.IsXRegister()) << src;
-  StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
-}
-
-void Arm64Assembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm,
-                                           ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP,
-                 offs.Int32Value());
-}
-
-void Arm64Assembler::StoreImmediateToThread64(ThreadOffset<8> offs, uint32_t imm,
-                                            ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreToOffset(scratch.AsXRegister(), TR, offs.Int32Value());
-}
-
-void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs,
-                                              FrameOffset fr_offs,
-                                              ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-}
-
-void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
-  vixl::Register temp = temps.AcquireX();
-  ___ Mov(temp, reg_x(SP));
-  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
-}
-
-void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
-                                   FrameOffset in_off, ManagedRegister m_scratch) {
-  Arm64ManagedRegister source = m_source.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8);
-}
-
-// Load routines.
-void Arm64Assembler::LoadImmediate(XRegister dest, int32_t value,
-                                   Condition cond) {
-  if ((cond == al) || (cond == nv)) {
-    ___ Mov(reg_x(dest), value);
-  } else {
-    // temp = value
-    // rd = cond ? temp : rd
-    if (value != 0) {
-      vixl::UseScratchRegisterScope temps(vixl_masm_);
-      temps.Exclude(reg_x(dest));
-      vixl::Register temp = temps.AcquireX();
-      ___ Mov(temp, value);
-      ___ Csel(reg_x(dest), temp, reg_x(dest), cond);
-    } else {
-      ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond);
-    }
-  }
-}
-
-void Arm64Assembler::LoadWFromOffset(LoadOperandType type, WRegister dest,
-                                     XRegister base, int32_t offset) {
-  switch (type) {
-    case kLoadSignedByte:
-      ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadSignedHalfword:
-      ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadUnsignedByte:
-      ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadUnsignedHalfword:
-      ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    case kLoadWord:
-      ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset));
-      break;
-    default:
-        LOG(FATAL) << "UNREACHABLE";
-  }
-}
-
-// Note: We can extend this member by adding load type info - see
-// sign extended A64 load variants.
-void Arm64Assembler::LoadFromOffset(XRegister dest, XRegister base,
-                                    int32_t offset) {
-  CHECK_NE(dest, SP);
-  ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::LoadSFromOffset(SRegister dest, XRegister base,
-                                     int32_t offset) {
-  ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::LoadDFromOffset(DRegister dest, XRegister base,
-                                     int32_t offset) {
-  ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset));
-}
-
-void Arm64Assembler::Load(Arm64ManagedRegister dest, XRegister base,
-                          int32_t offset, size_t size) {
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size) << dest;
-  } else if (dest.IsWRegister()) {
-    CHECK_EQ(4u, size) << dest;
-    ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset));
-  } else if (dest.IsXRegister()) {
-    CHECK_NE(dest.AsXRegister(), SP) << dest;
-    if (size == 4u) {
-      ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset));
-    } else {
-      CHECK_EQ(8u, size) << dest;
-      ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset));
-    }
-  } else if (dest.IsSRegister()) {
-    ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset));
-  } else {
-    CHECK(dest.IsDRegister()) << dest;
-    ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset));
-  }
-}
-
-void Arm64Assembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
-  return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
-}
-
-void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) {
-  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
-}
-
-void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  CHECK(dst.IsXRegister()) << dst;
-  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value());
-}
-
-void Arm64Assembler::LoadRef(ManagedRegister m_dst, ManagedRegister m_base, MemberOffset offs,
-                             bool unpoison_reference) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  Arm64ManagedRegister base = m_base.AsArm64();
-  CHECK(dst.IsXRegister() && base.IsXRegister());
-  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(),
-                  offs.Int32Value());
-  if (unpoison_reference) {
-    WRegister ref_reg = dst.AsOverlappingWRegister();
-    MaybeUnpoisonHeapReference(reg_w(ref_reg));
-  }
-}
-
 void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, Offset offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   Arm64ManagedRegister base = m_base.AsArm64();
   CHECK(dst.IsXRegister() && base.IsXRegister());
   // Remove dst and base form the temp list - higher level API uses IP1, IP0.
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(&vixl_masm_);
   temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
   ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
 }
 
-void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  CHECK(dst.IsXRegister()) << dst;
-  LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
-}
-
-// Copying routines.
-void Arm64Assembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) {
-  Arm64ManagedRegister dst = m_dst.AsArm64();
-  Arm64ManagedRegister src = m_src.AsArm64();
-  if (!dst.Equals(src)) {
-    if (dst.IsXRegister()) {
-      if (size == 4) {
-        CHECK(src.IsWRegister());
-        ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister()));
-      } else {
-        if (src.IsXRegister()) {
-          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister()));
-        } else {
-          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister()));
-        }
-      }
-    } else if (dst.IsWRegister()) {
-      CHECK(src.IsWRegister()) << src;
-      ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister()));
-    } else if (dst.IsSRegister()) {
-      CHECK(src.IsSRegister()) << src;
-      ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister()));
-    } else {
-      CHECK(dst.IsDRegister()) << dst;
-      CHECK(src.IsDRegister()) << src;
-      ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister()));
-    }
-  }
-}
-
-void Arm64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                          ThreadOffset<8> tr_offs,
-                                          ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-}
-
-void Arm64Assembler::CopyRawPtrToThread64(ThreadOffset<8> tr_offs,
-                                        FrameOffset fr_offs,
-                                        ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-}
-
-void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
-                             ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(),
-                  SP, src.Int32Value());
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(),
-                 SP, dest.Int32Value());
-}
-
-void Arm64Assembler::Copy(FrameOffset dest, FrameOffset src,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister base = src_base.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(),
-                   src_offset.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(ManagedRegister m_dest_base, Offset dest_offs, FrameOffset src,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister base = m_dest_base.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(),
-                   dest_offs.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                          ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
-}
-
-void Arm64Assembler::Copy(ManagedRegister m_dest, Offset dest_offset,
-                          ManagedRegister m_src, Offset src_offset,
-                          ManagedRegister m_scratch, size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64ManagedRegister src = m_src.AsArm64();
-  Arm64ManagedRegister dest = m_dest.AsArm64();
-  CHECK(dest.IsXRegister()) << dest;
-  CHECK(src.IsXRegister()) << src;
-  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    if (scratch.IsWRegister()) {
-      LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(),
-                    src_offset.Int32Value());
-      StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(),
-                   dest_offset.Int32Value());
-    } else {
-      LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(),
-                    src_offset.Int32Value());
-      StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(),
-                   dest_offset.Int32Value());
-    }
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
-  }
-}
-
-void Arm64Assembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/,
-                          FrameOffset /*src*/, Offset /*src_offset*/,
-                          ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
-}
-
-void Arm64Assembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
-  // TODO: Should we check that m_scratch is IP? - see arm.
-  ___ Dmb(vixl::InnerShareable, vixl::BarrierAll);
-}
-
-void Arm64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  Arm64ManagedRegister reg = mreg.AsArm64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsWRegister()) << reg;
-  if (size == 1) {
-    ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  } else {
-    ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  }
-}
-
-void Arm64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  Arm64ManagedRegister reg = mreg.AsArm64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsWRegister()) << reg;
-  if (size == 1) {
-    ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  } else {
-    ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
-  }
-}
-
-void Arm64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void Arm64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references.
-}
-
-void Arm64Assembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
-  Arm64ManagedRegister base = m_base.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
-}
-
 void Arm64Assembler::JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
   Arm64ManagedRegister base = m_base.AsArm64();
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsXRegister()) << base;
   CHECK(scratch.IsXRegister()) << scratch;
   // Remove base and scratch form the temp list - higher level API uses IP1, IP0.
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(&vixl_masm_);
   temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister()));
   ___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
   ___ Br(reg_x(scratch.AsXRegister()));
 }
 
-void Arm64Assembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  // Call *(*(SP + base) + offset)
-  LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
-}
-
-void Arm64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
-}
-
-void Arm64Assembler::CreateHandleScopeEntry(
-    ManagedRegister m_out_reg, FrameOffset handle_scope_offs, ManagedRegister m_in_reg,
-    bool null_allowed) {
-  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
-  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
-  // For now we only hold stale handle scope entries in x registers.
-  CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg;
-  CHECK(out_reg.IsXRegister()) << out_reg;
-  if (null_allowed) {
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
-    if (in_reg.IsNoRegister()) {
-      LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP,
-                      handle_scope_offs.Int32Value());
-      in_reg = out_reg;
-    }
-    ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0);
-    if (!out_reg.Equals(in_reg)) {
-      LoadImmediate(out_reg.AsXRegister(), 0, eq);
-    }
-    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne);
-  } else {
-    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al);
-  }
-}
-
-void Arm64Assembler::CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handle_scope_offset,
-                                            ManagedRegister m_scratch, bool null_allowed) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  if (null_allowed) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP,
-                    handle_scope_offset.Int32Value());
-    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
-    // the address in the handle scope holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0);
-    // Move this logic in add constants with flags.
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne);
-  } else {
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al);
-  }
-  StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value());
-}
-
-void Arm64Assembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
-                                                  ManagedRegister m_in_reg) {
-  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
-  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
-  CHECK(out_reg.IsXRegister()) << out_reg;
-  CHECK(in_reg.IsXRegister()) << in_reg;
-  vixl::Label exit;
-  if (!out_reg.Equals(in_reg)) {
-    // FIXME: Who sets the flags here?
-    LoadImmediate(out_reg.AsXRegister(), 0, eq);
-  }
-  ___ Cbz(reg_x(in_reg.AsXRegister()), &exit);
-  LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0);
-  ___ Bind(&exit);
-}
-
-void Arm64Assembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
-  CHECK_ALIGNED(stack_adjust, kStackAlignment);
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust));
-  LoadFromOffset(scratch.AsXRegister(), TR, Thread::ExceptionOffset<8>().Int32Value());
-  ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
-}
-
-void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
-  temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
-  vixl::Register temp = temps.AcquireX();
-
-  // Bind exception poll entry.
-  ___ Bind(exception->Entry());
-  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
-    DecreaseFrameSize(exception->stack_adjust_);
-  }
-  // Pass exception object as argument.
-  // Don't care about preserving X0 as this won't return.
-  ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
-  ___ Ldr(temp, MEM_OP(reg_x(TR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
-
-  ___ Blr(temp);
-  // Call should never return.
-  ___ Brk();
-}
-
 static inline dwarf::Reg DWARFReg(CPURegister reg) {
   if (reg.IsFPRegister()) {
-    return dwarf::Reg::Arm64Fp(reg.code());
+    return dwarf::Reg::Arm64Fp(reg.GetCode());
   } else {
-    DCHECK_LT(reg.code(), 31u);  // X0 - X30.
-    return dwarf::Reg::Arm64Core(reg.code());
+    DCHECK_LT(reg.GetCode(), 31u);  // X0 - X30.
+    return dwarf::Reg::Arm64Core(reg.GetCode());
   }
 }
 
-void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) {
-  int size = registers.RegisterSizeInBytes();
-  const Register sp = vixl_masm_->StackPointer();
-  while (registers.Count() >= 2) {
+void Arm64Assembler::SpillRegisters(CPURegList registers, int offset) {
+  int size = registers.GetRegisterSizeInBytes();
+  const Register sp = vixl_masm_.StackPointer();
+  // Since we are operating on register pairs, we would like to align on
+  // double the standard size; on the other hand, we don't want to insert
+  // an extra store, which will happen if the number of registers is even.
+  if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    ___ Str(dst0, MemOperand(sp, offset));
+    cfi_.RelOffset(DWARFReg(dst0), offset);
+    offset += size;
+  }
+  while (registers.GetCount() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     ___ Stp(dst0, dst1, MemOperand(sp, offset));
@@ -664,10 +108,17 @@
   DCHECK(registers.IsEmpty());
 }
 
-void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) {
-  int size = registers.RegisterSizeInBytes();
-  const Register sp = vixl_masm_->StackPointer();
-  while (registers.Count() >= 2) {
+void Arm64Assembler::UnspillRegisters(CPURegList registers, int offset) {
+  int size = registers.GetRegisterSizeInBytes();
+  const Register sp = vixl_masm_.StackPointer();
+  // Be consistent with the logic for spilling registers.
+  if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    ___ Ldr(dst0, MemOperand(sp, offset));
+    cfi_.Restore(DWARFReg(dst0));
+    offset += size;
+  }
+  while (registers.GetCount() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     ___ Ldp(dst0, dst1, MemOperand(sp, offset));
@@ -683,117 +134,25 @@
   DCHECK(registers.IsEmpty());
 }
 
-void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                const std::vector<ManagedRegister>& callee_save_regs,
-                                const ManagedRegisterEntrySpills& entry_spills) {
-  // Setup VIXL CPURegList for callee-saves.
-  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
-  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
-  for (auto r : callee_save_regs) {
-    Arm64ManagedRegister reg = r.AsArm64();
-    if (reg.IsXRegister()) {
-      core_reg_list.Combine(reg_x(reg.AsXRegister()).code());
-    } else {
-      DCHECK(reg.IsDRegister());
-      fp_reg_list.Combine(reg_d(reg.AsDRegister()).code());
-    }
-  }
-  size_t core_reg_size = core_reg_list.TotalSizeInBytes();
-  size_t fp_reg_size = fp_reg_list.TotalSizeInBytes();
-
-  // Increase frame to required size.
-  DCHECK_ALIGNED(frame_size, kStackAlignment);
-  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + kArm64PointerSize);
-  IncreaseFrameSize(frame_size);
-
-  // Save callee-saves.
-  SpillRegisters(core_reg_list, frame_size - core_reg_size);
-  SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
-
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
-
-  // Write ArtMethod*
-  DCHECK(X0 == method_reg.AsArm64().AsXRegister());
-  StoreToOffset(X0, SP, 0);
-
-  // Write out entry spills
-  int32_t offset = frame_size + kArm64PointerSize;
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      ManagedRegisterSpill spill = entry_spills.at(i);
-      offset += spill.getSize();
-    } else if (reg.IsXRegister()) {
-      StoreToOffset(reg.AsXRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsWRegister()) {
-      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsSRegister()) {
-      StoreSToOffset(reg.AsSRegister(), SP, offset);
-      offset += 4;
-    }
-  }
-}
-
-void Arm64Assembler::RemoveFrame(size_t frame_size,
-                                 const std::vector<ManagedRegister>& callee_save_regs) {
-  // Setup VIXL CPURegList for callee-saves.
-  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
-  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
-  for (auto r : callee_save_regs) {
-    Arm64ManagedRegister reg = r.AsArm64();
-    if (reg.IsXRegister()) {
-      core_reg_list.Combine(reg_x(reg.AsXRegister()).code());
-    } else {
-      DCHECK(reg.IsDRegister());
-      fp_reg_list.Combine(reg_d(reg.AsDRegister()).code());
-    }
-  }
-  size_t core_reg_size = core_reg_list.TotalSizeInBytes();
-  size_t fp_reg_size = fp_reg_list.TotalSizeInBytes();
-
-  // For now we only check that the size of the frame is large enough to hold spills and method
-  // reference.
-  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + kArm64PointerSize);
-  DCHECK_ALIGNED(frame_size, kStackAlignment);
-
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
-
-  cfi_.RememberState();
-
-  // Restore callee-saves.
-  UnspillRegisters(core_reg_list, frame_size - core_reg_size);
-  UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
-
-  // Decrease frame size to start of callee saved regs.
-  DecreaseFrameSize(frame_size);
-
-  // Pop callee saved and return to LR.
-  ___ Ret();
-
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void Arm64Assembler::PoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::PoisonHeapReference(Register reg) {
   DCHECK(reg.IsW());
   // reg = -reg.
-  ___ Neg(reg, vixl::Operand(reg));
+  ___ Neg(reg, Operand(reg));
 }
 
-void Arm64Assembler::UnpoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::UnpoisonHeapReference(Register reg) {
   DCHECK(reg.IsW());
   // reg = -reg.
-  ___ Neg(reg, vixl::Operand(reg));
+  ___ Neg(reg, Operand(reg));
 }
 
-void Arm64Assembler::MaybeUnpoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::MaybePoisonHeapReference(Register reg) {
+  if (kPoisonHeapReferences) {
+    PoisonHeapReference(reg);
+  }
+}
+
+void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) {
   if (kPoisonHeapReferences) {
     UnpoisonHeapReference(reg);
   }
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index c4e5de7..66a7fed 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -23,24 +23,21 @@
 
 #include "base/arena_containers.h"
 #include "base/logging.h"
-#include "constants_arm64.h"
 #include "utils/arm64/managed_register_arm64.h"
 #include "utils/assembler.h"
 #include "offsets.h"
 
-// TODO: make vixl clean wrt -Wshadow.
+// TODO(VIXL): Make VIXL compile with -Wshadow.
 #pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunknown-pragmas"
 #pragma GCC diagnostic ignored "-Wshadow"
-#pragma GCC diagnostic ignored "-Wmissing-noreturn"
-#include "vixl/a64/macro-assembler-a64.h"
-#include "vixl/a64/disasm-a64.h"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
 
-#define MEM_OP(...)      vixl::MemOperand(__VA_ARGS__)
+#define MEM_OP(...)      vixl::aarch64::MemOperand(__VA_ARGS__)
 
 enum LoadOperandType {
   kLoadSignedByte,
@@ -62,38 +59,13 @@
   kStoreDWord
 };
 
-class Arm64Exception {
- private:
-  Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {
-    }
-
-  vixl::Label* Entry() { return &exception_entry_; }
-
-  // Register used for passing Thread::Current()->exception_ .
-  const Arm64ManagedRegister scratch_;
-
-  // Stack adjust for ExceptionPool.
-  const size_t stack_adjust_;
-
-  vixl::Label exception_entry_;
-
-  friend class Arm64Assembler;
-  DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
-};
-
 class Arm64Assembler FINAL : public Assembler {
  public:
-  // We indicate the size of the initial code generation buffer to the VIXL
-  // assembler. From there we it will automatically manage the buffer.
-  explicit Arm64Assembler(ArenaAllocator* arena)
-      : Assembler(arena),
-        exception_blocks_(arena->Adapter(kArenaAllocAssembler)),
-        vixl_masm_(new vixl::MacroAssembler(kArm64BaseBufferSize)) {}
+  explicit Arm64Assembler(ArenaAllocator* arena) : Assembler(arena) {}
 
-  virtual ~Arm64Assembler() {
-    delete vixl_masm_;
-  }
+  virtual ~Arm64Assembler() {}
+
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
 
   // Finalize the code.
   void FinalizeCode() OVERRIDE;
@@ -105,115 +77,26 @@
   // Copy instructions out of assembly buffer into the given region of memory.
   void FinalizeInstructions(const MemoryRegion& region);
 
-  void SpillRegisters(vixl::CPURegList registers, int offset);
-  void UnspillRegisters(vixl::CPURegList registers, int offset);
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs);
 
-  // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines.
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines.
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-  void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE;
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE;
-
-  // Copying routines.
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
-  // Sign extension.
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension.
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current().
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                       ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                       ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst.
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset].
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch) OVERRIDE;
+  void SpillRegisters(vixl::aarch64::CPURegList registers, int offset);
+  void UnspillRegisters(vixl::aarch64::CPURegList registers, int offset);
 
   // Jump to address (not setting link register)
   void JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch);
 
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   //
   // Heap poisoning.
   //
 
   // Poison a heap reference contained in `reg`.
-  void PoisonHeapReference(vixl::Register reg);
+  void PoisonHeapReference(vixl::aarch64::Register reg);
   // Unpoison a heap reference contained in `reg`.
-  void UnpoisonHeapReference(vixl::Register reg);
+  void UnpoisonHeapReference(vixl::aarch64::Register reg);
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(vixl::aarch64::Register reg);
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
-  void MaybeUnpoisonHeapReference(vixl::Register reg);
+  void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg);
 
   void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
     UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64";
@@ -222,60 +105,37 @@
     UNIMPLEMENTED(FATAL) << "Do not use Jump for ARM64";
   }
 
- private:
-  static vixl::Register reg_x(int code) {
+  static vixl::aarch64::Register reg_x(int code) {
     CHECK(code < kNumberOfXRegisters) << code;
     if (code == SP) {
-      return vixl::sp;
+      return vixl::aarch64::sp;
     } else if (code == XZR) {
-      return vixl::xzr;
+      return vixl::aarch64::xzr;
     }
-    return vixl::Register::XRegFromCode(code);
+    return vixl::aarch64::Register::GetXRegFromCode(code);
   }
 
-  static vixl::Register reg_w(int code) {
+  static vixl::aarch64::Register reg_w(int code) {
     CHECK(code < kNumberOfWRegisters) << code;
     if (code == WSP) {
-      return vixl::wsp;
+      return vixl::aarch64::wsp;
     } else if (code == WZR) {
-      return vixl::wzr;
+      return vixl::aarch64::wzr;
     }
-    return vixl::Register::WRegFromCode(code);
+    return vixl::aarch64::Register::GetWRegFromCode(code);
   }
 
-  static vixl::FPRegister reg_d(int code) {
-    return vixl::FPRegister::DRegFromCode(code);
+  static vixl::aarch64::FPRegister reg_d(int code) {
+    return vixl::aarch64::FPRegister::GetDRegFromCode(code);
   }
 
-  static vixl::FPRegister reg_s(int code) {
-    return vixl::FPRegister::SRegFromCode(code);
+  static vixl::aarch64::FPRegister reg_s(int code) {
+    return vixl::aarch64::FPRegister::GetSRegFromCode(code);
   }
 
-  // Emits Exception block.
-  void EmitExceptionPoll(Arm64Exception *exception);
-
-  void StoreWToOffset(StoreOperandType type, WRegister source,
-                      XRegister base, int32_t offset);
-  void StoreToOffset(XRegister source, XRegister base, int32_t offset);
-  void StoreSToOffset(SRegister source, XRegister base, int32_t offset);
-  void StoreDToOffset(DRegister source, XRegister base, int32_t offset);
-
-  void LoadImmediate(XRegister dest, int32_t value, vixl::Condition cond = vixl::al);
-  void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size);
-  void LoadWFromOffset(LoadOperandType type, WRegister dest,
-                      XRegister base, int32_t offset);
-  void LoadFromOffset(XRegister dest, XRegister base, int32_t offset);
-  void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset);
-  void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset);
-  void AddConstant(XRegister rd, int32_t value, vixl::Condition cond = vixl::al);
-  void AddConstant(XRegister rd, XRegister rn, int32_t value, vixl::Condition cond = vixl::al);
-
-  // List of exception blocks to generate at the end of the code cache.
-  ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
-
- public:
-  // Vixl assembler.
-  vixl::MacroAssembler* const vixl_masm_;
+ private:
+  // VIXL assembler.
+  vixl::aarch64::MacroAssembler vixl_masm_;
 
   // Used for testing.
   friend class Arm64ManagedRegister_VixlRegisters_Test;
diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h
deleted file mode 100644
index 01e8be9..0000000
--- a/compiler/utils/arm64/constants_arm64.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
-#define ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
-
-#include <stdint.h>
-#include <iosfwd>
-#include "arch/arm64/registers_arm64.h"
-#include "base/casts.h"
-#include "base/logging.h"
-#include "globals.h"
-
-// TODO: Extend this file by adding missing functionality.
-
-namespace art {
-namespace arm64 {
-
-constexpr size_t kArm64BaseBufferSize = 4096;
-
-}  // namespace arm64
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
new file mode 100644
index 0000000..dfdcd11
--- /dev/null
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -0,0 +1,754 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_arm64.h"
+
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "managed_register_arm64.h"
+#include "offsets.h"
+#include "thread.h"
+
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
+
+namespace art {
+namespace arm64 {
+
+#ifdef ___
+#error "ARM64 Assembler macro already defined."
+#else
+#define ___   asm_.GetVIXLAssembler()->
+#endif
+
+#define reg_x(X) Arm64Assembler::reg_x(X)
+#define reg_w(W) Arm64Assembler::reg_w(W)
+#define reg_d(D) Arm64Assembler::reg_d(D)
+#define reg_s(S) Arm64Assembler::reg_s(S)
+
+Arm64JNIMacroAssembler::~Arm64JNIMacroAssembler() {
+}
+
+void Arm64JNIMacroAssembler::FinalizeCode() {
+  for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) {
+    EmitExceptionPoll(exception.get());
+  }
+  ___ FinalizeCode();
+}
+
+void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
+}
+
+void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
+  StoreToOffset(TR, SP, offset.Int32Value());
+}
+
+// See Arm64 PCS Section 5.2.2.1.
+void Arm64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  AddConstant(SP, -adjust);
+  cfi().AdjustCFAOffset(adjust);
+}
+
+// See Arm64 PCS Section 5.2.2.1.
+void Arm64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  AddConstant(SP, adjust);
+  cfi().AdjustCFAOffset(-adjust);
+}
+
+void Arm64JNIMacroAssembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
+  AddConstant(rd, rd, value, cond);
+}
+
+void Arm64JNIMacroAssembler::AddConstant(XRegister rd,
+                                         XRegister rn,
+                                         int32_t value,
+                                         Condition cond) {
+  if ((cond == al) || (cond == nv)) {
+    // VIXL macro-assembler handles all variants.
+    ___ Add(reg_x(rd), reg_x(rn), value);
+  } else {
+    // temp = rd + value
+    // rd = cond ? temp : rn
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    temps.Exclude(reg_x(rd), reg_x(rn));
+    Register temp = temps.AcquireX();
+    ___ Add(temp, reg_x(rn), value);
+    ___ Csel(reg_x(rd), temp, reg_x(rd), cond);
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreWToOffset(StoreOperandType type,
+                                            WRegister source,
+                                            XRegister base,
+                                            int32_t offset) {
+  switch (type) {
+    case kStoreByte:
+      ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    case kStoreHalfword:
+      ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    case kStoreWord:
+      ___ Str(reg_w(source), MEM_OP(reg_x(base), offset));
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) {
+  CHECK_NE(source, SP);
+  ___ Str(reg_x(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) {
+  ___ Str(reg_s(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) {
+  ___ Str(reg_d(source), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsWRegister()) {
+    CHECK_EQ(4u, size);
+    StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value());
+  } else if (src.IsXRegister()) {
+    CHECK_EQ(8u, size);
+    StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
+  } else if (src.IsSRegister()) {
+    StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value());
+  } else {
+    CHECK(src.IsDRegister()) << src;
+    StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value());
+  }
+}
+
+void Arm64JNIMacroAssembler::StoreRef(FrameOffset offs, ManagedRegister m_src) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  CHECK(src.IsXRegister()) << src;
+  StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP,
+                 offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
+  Arm64ManagedRegister src = m_src.AsArm64();
+  CHECK(src.IsXRegister()) << src;
+  StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs,
+                                                   uint32_t imm,
+                                                   ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadImmediate(scratch.AsXRegister(), imm);
+  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP,
+                 offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs,
+                                                      FrameOffset fr_offs,
+                                                      ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register temp = temps.AcquireX();
+  ___ Mov(temp, reg_x(SP));
+  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
+}
+
+void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off,
+                                           ManagedRegister m_source,
+                                           FrameOffset in_off,
+                                           ManagedRegister m_scratch) {
+  Arm64ManagedRegister source = m_source.AsArm64();
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8);
+}
+
+// Load routines.
+void Arm64JNIMacroAssembler::LoadImmediate(XRegister dest, int32_t value, Condition cond) {
+  if ((cond == al) || (cond == nv)) {
+    ___ Mov(reg_x(dest), value);
+  } else {
+    // temp = value
+    // rd = cond ? temp : rd
+    if (value != 0) {
+      UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+      temps.Exclude(reg_x(dest));
+      Register temp = temps.AcquireX();
+      ___ Mov(temp, value);
+      ___ Csel(reg_x(dest), temp, reg_x(dest), cond);
+    } else {
+      ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond);
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::LoadWFromOffset(LoadOperandType type,
+                                             WRegister dest,
+                                             XRegister base,
+                                             int32_t offset) {
+  switch (type) {
+    case kLoadSignedByte:
+      ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadSignedHalfword:
+      ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadUnsignedByte:
+      ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadUnsignedHalfword:
+      ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    case kLoadWord:
+      ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset));
+      break;
+    default:
+        LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+// Note: We can extend this member by adding load type info - see
+// sign extended A64 load variants.
+void Arm64JNIMacroAssembler::LoadFromOffset(XRegister dest, XRegister base, int32_t offset) {
+  CHECK_NE(dest, SP);
+  ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::LoadSFromOffset(SRegister dest, XRegister base, int32_t offset) {
+  ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::LoadDFromOffset(DRegister dest, XRegister base, int32_t offset) {
+  ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset));
+}
+
+void Arm64JNIMacroAssembler::Load(Arm64ManagedRegister dest,
+                                  XRegister base,
+                                  int32_t offset,
+                                  size_t size) {
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dest;
+  } else if (dest.IsWRegister()) {
+    CHECK_EQ(4u, size) << dest;
+    ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset));
+  } else if (dest.IsXRegister()) {
+    CHECK_NE(dest.AsXRegister(), SP) << dest;
+    if (size == 4u) {
+      ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset));
+    } else {
+      CHECK_EQ(8u, size) << dest;
+      ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset));
+    }
+  } else if (dest.IsSRegister()) {
+    ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset));
+  } else {
+    CHECK(dest.IsDRegister()) << dest;
+    ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset));
+  }
+}
+
+void Arm64JNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
+  return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
+}
+
+void Arm64JNIMacroAssembler::LoadFromThread(ManagedRegister m_dst,
+                                            ThreadOffset64 src,
+                                            size_t size) {
+  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
+}
+
+void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  CHECK(dst.IsXRegister()) << dst;
+  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst,
+                                     ManagedRegister m_base,
+                                     MemberOffset offs,
+                                     bool unpoison_reference) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister base = m_base.AsArm64();
+  CHECK(dst.IsXRegister() && base.IsXRegister());
+  LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(),
+                  offs.Int32Value());
+  if (unpoison_reference) {
+    WRegister ref_reg = dst.AsOverlappingWRegister();
+    asm_.MaybeUnpoisonHeapReference(reg_w(ref_reg));
+  }
+}
+
+void Arm64JNIMacroAssembler::LoadRawPtr(ManagedRegister m_dst,
+                                        ManagedRegister m_base,
+                                        Offset offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister base = m_base.AsArm64();
+  CHECK(dst.IsXRegister() && base.IsXRegister());
+  // Remove dst and base form the temp list - higher level API uses IP1, IP0.
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
+  ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
+}
+
+void Arm64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  CHECK(dst.IsXRegister()) << dst;
+  LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
+}
+
+// Copying routines.
+void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  Arm64ManagedRegister src = m_src.AsArm64();
+  if (!dst.Equals(src)) {
+    if (dst.IsXRegister()) {
+      if (size == 4) {
+        CHECK(src.IsWRegister());
+        ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister()));
+      } else {
+        if (src.IsXRegister()) {
+          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister()));
+        } else {
+          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister()));
+        }
+      }
+    } else if (dst.IsWRegister()) {
+      CHECK(src.IsWRegister()) << src;
+      ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister()));
+    } else if (dst.IsSRegister()) {
+      CHECK(src.IsSRegister()) << src;
+      ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister()));
+    } else {
+      CHECK(dst.IsDRegister()) << dst;
+      CHECK(src.IsDRegister()) << src;
+      ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister()));
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                  ThreadOffset64 tr_offs,
+                                                  ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs,
+                                                FrameOffset fr_offs,
+                                                ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(),
+                  SP, src.Int32Value());
+  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(),
+                 SP, dest.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
+                                  FrameOffset src,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
+                                  ManagedRegister src_base,
+                                  Offset src_offset,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister base = src_base.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(),
+                   src_offset.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest_base,
+                                  Offset dest_offs,
+                                  FrameOffset src,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister base = m_dest_base.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(),
+                   dest_offs.Int32Value());
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                  FrameOffset /*src_base*/,
+                                  Offset /*src_offset*/,
+                                  ManagedRegister /*mscratch*/,
+                                  size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
+}
+
+void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest,
+                                  Offset dest_offset,
+                                  ManagedRegister m_src,
+                                  Offset src_offset,
+                                  ManagedRegister m_scratch,
+                                  size_t size) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  Arm64ManagedRegister src = m_src.AsArm64();
+  Arm64ManagedRegister dest = m_dest.AsArm64();
+  CHECK(dest.IsXRegister()) << dest;
+  CHECK(src.IsXRegister()) << src;
+  CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(size == 4 || size == 8) << size;
+  if (size == 4) {
+    if (scratch.IsWRegister()) {
+      LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(),
+                    src_offset.Int32Value());
+      StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(),
+                   dest_offset.Int32Value());
+    } else {
+      LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(),
+                    src_offset.Int32Value());
+      StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(),
+                   dest_offset.Int32Value());
+    }
+  } else if (size == 8) {
+    LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value());
+    StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value());
+  } else {
+    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+  }
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                  Offset /*dest_offset*/,
+                                  FrameOffset /*src*/,
+                                  Offset /*src_offset*/,
+                                  ManagedRegister /*scratch*/,
+                                  size_t /*size*/) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
+}
+
+void Arm64JNIMacroAssembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
+  // TODO: Should we check that m_scratch is IP? - see arm.
+  ___ Dmb(InnerShareable, BarrierAll);
+}
+
+void Arm64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  Arm64ManagedRegister reg = mreg.AsArm64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsWRegister()) << reg;
+  if (size == 1) {
+    ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  } else {
+    ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  }
+}
+
+void Arm64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  Arm64ManagedRegister reg = mreg.AsArm64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsWRegister()) << reg;
+  if (size == 1) {
+    ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  } else {
+    ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  }
+}
+
+void Arm64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void Arm64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references.
+}
+
+void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
+  Arm64ManagedRegister base = m_base.AsArm64();
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+  ___ Blr(reg_x(scratch.AsXRegister()));
+}
+
+void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  // Call *(*(SP + base) + offset)
+  LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value());
+  ___ Blr(reg_x(scratch.AsXRegister()));
+}
+
+void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
+                                            ManagedRegister scratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
+}
+
+void Arm64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister m_out_reg,
+                                                    FrameOffset handle_scope_offs,
+                                                    ManagedRegister m_in_reg,
+                                                    bool null_allowed) {
+  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
+  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
+  // For now we only hold stale handle scope entries in x registers.
+  CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg;
+  CHECK(out_reg.IsXRegister()) << out_reg;
+  if (null_allowed) {
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    if (in_reg.IsNoRegister()) {
+      LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP,
+                      handle_scope_offs.Int32Value());
+      in_reg = out_reg;
+    }
+    ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0);
+    if (!out_reg.Equals(in_reg)) {
+      LoadImmediate(out_reg.AsXRegister(), 0, eq);
+    }
+    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne);
+  } else {
+    AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al);
+  }
+}
+
+void Arm64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                    FrameOffset handle_scope_offset,
+                                                    ManagedRegister m_scratch,
+                                                    bool null_allowed) {
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(scratch.IsXRegister()) << scratch;
+  if (null_allowed) {
+    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP,
+                    handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
+    ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0);
+    // Move this logic in add constants with flags.
+    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne);
+  } else {
+    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al);
+  }
+  StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value());
+}
+
+void Arm64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
+                                                          ManagedRegister m_in_reg) {
+  Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
+  Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
+  CHECK(out_reg.IsXRegister()) << out_reg;
+  CHECK(in_reg.IsXRegister()) << in_reg;
+  vixl::aarch64::Label exit;
+  if (!out_reg.Equals(in_reg)) {
+    // FIXME: Who sets the flags here?
+    LoadImmediate(out_reg.AsXRegister(), 0, eq);
+  }
+  ___ Cbz(reg_x(in_reg.AsXRegister()), &exit);
+  LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0);
+  ___ Bind(&exit);
+}
+
+void Arm64JNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
+  CHECK_ALIGNED(stack_adjust, kStackAlignment);
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust));
+  LoadFromOffset(scratch.AsXRegister(),
+                 TR,
+                 Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
+  ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
+}
+
+void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
+  Register temp = temps.AcquireX();
+
+  // Bind exception poll entry.
+  ___ Bind(exception->Entry());
+  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSize(exception->stack_adjust_);
+  }
+  // Pass exception object as argument.
+  // Don't care about preserving X0 as this won't return.
+  ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
+  ___ Ldr(temp,
+          MEM_OP(reg_x(TR),
+                 QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value()));
+
+  ___ Blr(temp);
+  // Call should never return.
+  ___ Brk();
+}
+
+void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                        ManagedRegister method_reg,
+                                        ArrayRef<const ManagedRegister> callee_save_regs,
+                                        const ManagedRegisterEntrySpills& entry_spills) {
+  // Setup VIXL CPURegList for callee-saves.
+  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
+  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
+  for (auto r : callee_save_regs) {
+    Arm64ManagedRegister reg = r.AsArm64();
+    if (reg.IsXRegister()) {
+      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
+    } else {
+      DCHECK(reg.IsDRegister());
+      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
+    }
+  }
+  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
+  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
+
+  // Increase frame to required size.
+  DCHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
+  IncreaseFrameSize(frame_size);
+
+  // Save callee-saves.
+  asm_.SpillRegisters(core_reg_list, frame_size - core_reg_size);
+  asm_.SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
+
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
+
+  // Write ArtMethod*
+  DCHECK(X0 == method_reg.AsArm64().AsXRegister());
+  StoreToOffset(X0, SP, 0);
+
+  // Write out entry spills
+  int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize);
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      ManagedRegisterSpill spill = entry_spills.at(i);
+      offset += spill.getSize();
+    } else if (reg.IsXRegister()) {
+      StoreToOffset(reg.AsXRegister(), SP, offset);
+      offset += 8;
+    } else if (reg.IsWRegister()) {
+      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsDRegister()) {
+      StoreDToOffset(reg.AsDRegister(), SP, offset);
+      offset += 8;
+    } else if (reg.IsSRegister()) {
+      StoreSToOffset(reg.AsSRegister(), SP, offset);
+      offset += 4;
+    }
+  }
+}
+
+void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                         ArrayRef<const ManagedRegister> callee_save_regs) {
+  // Setup VIXL CPURegList for callee-saves.
+  CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
+  CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
+  for (auto r : callee_save_regs) {
+    Arm64ManagedRegister reg = r.AsArm64();
+    if (reg.IsXRegister()) {
+      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
+    } else {
+      DCHECK(reg.IsDRegister());
+      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
+    }
+  }
+  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
+  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
+
+  // For now we only check that the size of the frame is large enough to hold spills and method
+  // reference.
+  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
+  DCHECK_ALIGNED(frame_size, kStackAlignment);
+
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
+
+  cfi().RememberState();
+
+  // Restore callee-saves.
+  asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size);
+  asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
+
+  // Decrease frame size to start of callee saved regs.
+  DecreaseFrameSize(frame_size);
+
+  // Pop callee saved and return to LR.
+  ___ Ret();
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+#undef ___
+
+}  // namespace arm64
+}  // namespace art
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
new file mode 100644
index 0000000..b9f6854
--- /dev/null
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
+#define ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
+
+#include <stdint.h>
+#include <memory>
+#include <vector>
+
+#include "assembler_arm64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/logging.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+#include "offsets.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
+
+namespace art {
+namespace arm64 {
+
+class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> {
+ public:
+  explicit Arm64JNIMacroAssembler(ArenaAllocator* arena)
+      : JNIMacroAssemblerFwd(arena),
+        exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {}
+
+  ~Arm64JNIMacroAssembler();
+
+  // Finalize the code.
+  void FinalizeCode() OVERRIDE;
+
+  // Emit code that will create an activation on the stack.
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack.
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines.
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister src,
+                     FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines.
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+  void LoadRef(ManagedRegister dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
+
+  // Copying routines.
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+  // Sign extension.
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension.
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current().
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister scratch,
+                              bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst.
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset].
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  class Arm64Exception {
+   public:
+    Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
+        : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+    vixl::aarch64::Label* Entry() { return &exception_entry_; }
+
+    // Register used for passing Thread::Current()->exception_ .
+    const Arm64ManagedRegister scratch_;
+
+    // Stack adjust for ExceptionPool.
+    const size_t stack_adjust_;
+
+    vixl::aarch64::Label exception_entry_;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
+  };
+
+  // Emits Exception block.
+  void EmitExceptionPoll(Arm64Exception *exception);
+
+  void StoreWToOffset(StoreOperandType type,
+                      WRegister source,
+                      XRegister base,
+                      int32_t offset);
+  void StoreToOffset(XRegister source, XRegister base, int32_t offset);
+  void StoreSToOffset(SRegister source, XRegister base, int32_t offset);
+  void StoreDToOffset(DRegister source, XRegister base, int32_t offset);
+
+  void LoadImmediate(XRegister dest,
+                     int32_t value,
+                     vixl::aarch64::Condition cond = vixl::aarch64::al);
+  void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size);
+  void LoadWFromOffset(LoadOperandType type,
+                       WRegister dest,
+                       XRegister base,
+                       int32_t offset);
+  void LoadFromOffset(XRegister dest, XRegister base, int32_t offset);
+  void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset);
+  void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset);
+  void AddConstant(XRegister rd,
+                   int32_t value,
+                   vixl::aarch64::Condition cond = vixl::aarch64::al);
+  void AddConstant(XRegister rd,
+                   XRegister rn,
+                   int32_t value,
+                   vixl::aarch64::Condition cond = vixl::aarch64::al);
+
+  // List of exception blocks to generate at the end of the code cache.
+  ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index 46be1c5..7378a0a 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -17,8 +17,8 @@
 #ifndef ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
 #define ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
 
+#include "arch/arm64/registers_arm64.h"
 #include "base/logging.h"
-#include "constants_arm64.h"
 #include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
@@ -56,80 +56,80 @@
 
 class Arm64ManagedRegister : public ManagedRegister {
  public:
-  XRegister AsXRegister() const {
+  constexpr XRegister AsXRegister() const {
     CHECK(IsXRegister());
     return static_cast<XRegister>(id_);
   }
 
-  WRegister AsWRegister() const {
+  constexpr WRegister AsWRegister() const {
     CHECK(IsWRegister());
     return static_cast<WRegister>(id_ - kNumberOfXRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds);
   }
 
-  SRegister AsSRegister() const {
+  constexpr SRegister AsSRegister() const {
     CHECK(IsSRegister());
     return static_cast<SRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds -
                                   kNumberOfDRegIds);
   }
 
-  WRegister AsOverlappingWRegister() const {
+  constexpr WRegister AsOverlappingWRegister() const {
     CHECK(IsValidManagedRegister());
     if (IsZeroRegister()) return WZR;
     return static_cast<WRegister>(AsXRegister());
   }
 
-  XRegister AsOverlappingXRegister() const {
+  constexpr XRegister AsOverlappingXRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<XRegister>(AsWRegister());
   }
 
-  SRegister AsOverlappingSRegister() const {
+  constexpr SRegister AsOverlappingSRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<SRegister>(AsDRegister());
   }
 
-  DRegister AsOverlappingDRegister() const {
+  constexpr DRegister AsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<DRegister>(AsSRegister());
   }
 
-  bool IsXRegister() const {
+  constexpr bool IsXRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfXRegIds);
   }
 
-  bool IsWRegister() const {
+  constexpr bool IsWRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfXRegIds;
     return (0 <= test) && (test < kNumberOfWRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
-  bool IsSRegister() const {
+  constexpr bool IsSRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds + kNumberOfDRegIds);
     return (0 <= test) && (test < kNumberOfSRegIds);
   }
 
-  bool IsGPRegister() const {
+  constexpr bool IsGPRegister() const {
     return IsXRegister() || IsWRegister();
   }
 
-  bool IsFPRegister() const {
+  constexpr bool IsFPRegister() const {
     return IsDRegister() || IsSRegister();
   }
 
-  bool IsSameType(Arm64ManagedRegister test) const {
+  constexpr bool IsSameType(Arm64ManagedRegister test) const {
     CHECK(IsValidManagedRegister() && test.IsValidManagedRegister());
     return
       (IsXRegister() && test.IsXRegister()) ||
@@ -145,53 +145,53 @@
 
   void Print(std::ostream& os) const;
 
-  static Arm64ManagedRegister FromXRegister(XRegister r) {
+  static constexpr Arm64ManagedRegister FromXRegister(XRegister r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static Arm64ManagedRegister FromWRegister(WRegister r) {
+  static constexpr Arm64ManagedRegister FromWRegister(WRegister r) {
     CHECK_NE(r, kNoWRegister);
     return FromRegId(r + kNumberOfXRegIds);
   }
 
-  static Arm64ManagedRegister FromDRegister(DRegister r) {
+  static constexpr Arm64ManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds));
   }
 
-  static Arm64ManagedRegister FromSRegister(SRegister r) {
+  static constexpr Arm64ManagedRegister FromSRegister(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds +
                           kNumberOfDRegIds));
   }
 
   // Returns the X register overlapping W register r.
-  static Arm64ManagedRegister FromWRegisterX(WRegister r) {
+  static constexpr Arm64ManagedRegister FromWRegisterX(WRegister r) {
     CHECK_NE(r, kNoWRegister);
     return FromRegId(r);
   }
 
   // Return the D register overlapping S register r.
-  static Arm64ManagedRegister FromSRegisterD(SRegister r) {
+  static constexpr Arm64ManagedRegister FromSRegisterD(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  bool IsStackPointer() const {
+  constexpr bool IsStackPointer() const {
     return IsXRegister() && (id_ == SP);
   }
 
-  bool IsZeroRegister() const {
+  constexpr bool IsZeroRegister() const {
     return IsXRegister() && (id_ == XZR);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -202,9 +202,9 @@
 
   friend class ManagedRegister;
 
-  explicit Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static Arm64ManagedRegister FromRegId(int reg_id) {
+  static constexpr Arm64ManagedRegister FromRegId(int reg_id) {
     Arm64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -215,7 +215,7 @@
 
 }  // namespace arm64
 
-inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const {
+constexpr inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const {
   arm64::Arm64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc
index e27115d..79076b8 100644
--- a/compiler/utils/arm64/managed_register_arm64_test.cc
+++ b/compiler/utils/arm64/managed_register_arm64_test.cc
@@ -591,149 +591,149 @@
 
 TEST(Arm64ManagedRegister, VixlRegisters) {
   // X Registers.
-  EXPECT_TRUE(vixl::x0.Is(Arm64Assembler::reg_x(X0)));
-  EXPECT_TRUE(vixl::x1.Is(Arm64Assembler::reg_x(X1)));
-  EXPECT_TRUE(vixl::x2.Is(Arm64Assembler::reg_x(X2)));
-  EXPECT_TRUE(vixl::x3.Is(Arm64Assembler::reg_x(X3)));
-  EXPECT_TRUE(vixl::x4.Is(Arm64Assembler::reg_x(X4)));
-  EXPECT_TRUE(vixl::x5.Is(Arm64Assembler::reg_x(X5)));
-  EXPECT_TRUE(vixl::x6.Is(Arm64Assembler::reg_x(X6)));
-  EXPECT_TRUE(vixl::x7.Is(Arm64Assembler::reg_x(X7)));
-  EXPECT_TRUE(vixl::x8.Is(Arm64Assembler::reg_x(X8)));
-  EXPECT_TRUE(vixl::x9.Is(Arm64Assembler::reg_x(X9)));
-  EXPECT_TRUE(vixl::x10.Is(Arm64Assembler::reg_x(X10)));
-  EXPECT_TRUE(vixl::x11.Is(Arm64Assembler::reg_x(X11)));
-  EXPECT_TRUE(vixl::x12.Is(Arm64Assembler::reg_x(X12)));
-  EXPECT_TRUE(vixl::x13.Is(Arm64Assembler::reg_x(X13)));
-  EXPECT_TRUE(vixl::x14.Is(Arm64Assembler::reg_x(X14)));
-  EXPECT_TRUE(vixl::x15.Is(Arm64Assembler::reg_x(X15)));
-  EXPECT_TRUE(vixl::x16.Is(Arm64Assembler::reg_x(X16)));
-  EXPECT_TRUE(vixl::x17.Is(Arm64Assembler::reg_x(X17)));
-  EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(X18)));
-  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(X19)));
-  EXPECT_TRUE(vixl::x20.Is(Arm64Assembler::reg_x(X20)));
-  EXPECT_TRUE(vixl::x21.Is(Arm64Assembler::reg_x(X21)));
-  EXPECT_TRUE(vixl::x22.Is(Arm64Assembler::reg_x(X22)));
-  EXPECT_TRUE(vixl::x23.Is(Arm64Assembler::reg_x(X23)));
-  EXPECT_TRUE(vixl::x24.Is(Arm64Assembler::reg_x(X24)));
-  EXPECT_TRUE(vixl::x25.Is(Arm64Assembler::reg_x(X25)));
-  EXPECT_TRUE(vixl::x26.Is(Arm64Assembler::reg_x(X26)));
-  EXPECT_TRUE(vixl::x27.Is(Arm64Assembler::reg_x(X27)));
-  EXPECT_TRUE(vixl::x28.Is(Arm64Assembler::reg_x(X28)));
-  EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(X29)));
-  EXPECT_TRUE(vixl::x30.Is(Arm64Assembler::reg_x(X30)));
+  EXPECT_TRUE(vixl::aarch64::x0.Is(Arm64Assembler::reg_x(X0)));
+  EXPECT_TRUE(vixl::aarch64::x1.Is(Arm64Assembler::reg_x(X1)));
+  EXPECT_TRUE(vixl::aarch64::x2.Is(Arm64Assembler::reg_x(X2)));
+  EXPECT_TRUE(vixl::aarch64::x3.Is(Arm64Assembler::reg_x(X3)));
+  EXPECT_TRUE(vixl::aarch64::x4.Is(Arm64Assembler::reg_x(X4)));
+  EXPECT_TRUE(vixl::aarch64::x5.Is(Arm64Assembler::reg_x(X5)));
+  EXPECT_TRUE(vixl::aarch64::x6.Is(Arm64Assembler::reg_x(X6)));
+  EXPECT_TRUE(vixl::aarch64::x7.Is(Arm64Assembler::reg_x(X7)));
+  EXPECT_TRUE(vixl::aarch64::x8.Is(Arm64Assembler::reg_x(X8)));
+  EXPECT_TRUE(vixl::aarch64::x9.Is(Arm64Assembler::reg_x(X9)));
+  EXPECT_TRUE(vixl::aarch64::x10.Is(Arm64Assembler::reg_x(X10)));
+  EXPECT_TRUE(vixl::aarch64::x11.Is(Arm64Assembler::reg_x(X11)));
+  EXPECT_TRUE(vixl::aarch64::x12.Is(Arm64Assembler::reg_x(X12)));
+  EXPECT_TRUE(vixl::aarch64::x13.Is(Arm64Assembler::reg_x(X13)));
+  EXPECT_TRUE(vixl::aarch64::x14.Is(Arm64Assembler::reg_x(X14)));
+  EXPECT_TRUE(vixl::aarch64::x15.Is(Arm64Assembler::reg_x(X15)));
+  EXPECT_TRUE(vixl::aarch64::x16.Is(Arm64Assembler::reg_x(X16)));
+  EXPECT_TRUE(vixl::aarch64::x17.Is(Arm64Assembler::reg_x(X17)));
+  EXPECT_TRUE(vixl::aarch64::x18.Is(Arm64Assembler::reg_x(X18)));
+  EXPECT_TRUE(vixl::aarch64::x19.Is(Arm64Assembler::reg_x(X19)));
+  EXPECT_TRUE(vixl::aarch64::x20.Is(Arm64Assembler::reg_x(X20)));
+  EXPECT_TRUE(vixl::aarch64::x21.Is(Arm64Assembler::reg_x(X21)));
+  EXPECT_TRUE(vixl::aarch64::x22.Is(Arm64Assembler::reg_x(X22)));
+  EXPECT_TRUE(vixl::aarch64::x23.Is(Arm64Assembler::reg_x(X23)));
+  EXPECT_TRUE(vixl::aarch64::x24.Is(Arm64Assembler::reg_x(X24)));
+  EXPECT_TRUE(vixl::aarch64::x25.Is(Arm64Assembler::reg_x(X25)));
+  EXPECT_TRUE(vixl::aarch64::x26.Is(Arm64Assembler::reg_x(X26)));
+  EXPECT_TRUE(vixl::aarch64::x27.Is(Arm64Assembler::reg_x(X27)));
+  EXPECT_TRUE(vixl::aarch64::x28.Is(Arm64Assembler::reg_x(X28)));
+  EXPECT_TRUE(vixl::aarch64::x29.Is(Arm64Assembler::reg_x(X29)));
+  EXPECT_TRUE(vixl::aarch64::x30.Is(Arm64Assembler::reg_x(X30)));
 
-  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(TR)));
-  EXPECT_TRUE(vixl::ip0.Is(Arm64Assembler::reg_x(IP0)));
-  EXPECT_TRUE(vixl::ip1.Is(Arm64Assembler::reg_x(IP1)));
-  EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(FP)));
-  EXPECT_TRUE(vixl::lr.Is(Arm64Assembler::reg_x(LR)));
-  EXPECT_TRUE(vixl::sp.Is(Arm64Assembler::reg_x(SP)));
-  EXPECT_TRUE(vixl::xzr.Is(Arm64Assembler::reg_x(XZR)));
+  EXPECT_TRUE(vixl::aarch64::x19.Is(Arm64Assembler::reg_x(TR)));
+  EXPECT_TRUE(vixl::aarch64::ip0.Is(Arm64Assembler::reg_x(IP0)));
+  EXPECT_TRUE(vixl::aarch64::ip1.Is(Arm64Assembler::reg_x(IP1)));
+  EXPECT_TRUE(vixl::aarch64::x29.Is(Arm64Assembler::reg_x(FP)));
+  EXPECT_TRUE(vixl::aarch64::lr.Is(Arm64Assembler::reg_x(LR)));
+  EXPECT_TRUE(vixl::aarch64::sp.Is(Arm64Assembler::reg_x(SP)));
+  EXPECT_TRUE(vixl::aarch64::xzr.Is(Arm64Assembler::reg_x(XZR)));
 
   // W Registers.
-  EXPECT_TRUE(vixl::w0.Is(Arm64Assembler::reg_w(W0)));
-  EXPECT_TRUE(vixl::w1.Is(Arm64Assembler::reg_w(W1)));
-  EXPECT_TRUE(vixl::w2.Is(Arm64Assembler::reg_w(W2)));
-  EXPECT_TRUE(vixl::w3.Is(Arm64Assembler::reg_w(W3)));
-  EXPECT_TRUE(vixl::w4.Is(Arm64Assembler::reg_w(W4)));
-  EXPECT_TRUE(vixl::w5.Is(Arm64Assembler::reg_w(W5)));
-  EXPECT_TRUE(vixl::w6.Is(Arm64Assembler::reg_w(W6)));
-  EXPECT_TRUE(vixl::w7.Is(Arm64Assembler::reg_w(W7)));
-  EXPECT_TRUE(vixl::w8.Is(Arm64Assembler::reg_w(W8)));
-  EXPECT_TRUE(vixl::w9.Is(Arm64Assembler::reg_w(W9)));
-  EXPECT_TRUE(vixl::w10.Is(Arm64Assembler::reg_w(W10)));
-  EXPECT_TRUE(vixl::w11.Is(Arm64Assembler::reg_w(W11)));
-  EXPECT_TRUE(vixl::w12.Is(Arm64Assembler::reg_w(W12)));
-  EXPECT_TRUE(vixl::w13.Is(Arm64Assembler::reg_w(W13)));
-  EXPECT_TRUE(vixl::w14.Is(Arm64Assembler::reg_w(W14)));
-  EXPECT_TRUE(vixl::w15.Is(Arm64Assembler::reg_w(W15)));
-  EXPECT_TRUE(vixl::w16.Is(Arm64Assembler::reg_w(W16)));
-  EXPECT_TRUE(vixl::w17.Is(Arm64Assembler::reg_w(W17)));
-  EXPECT_TRUE(vixl::w18.Is(Arm64Assembler::reg_w(W18)));
-  EXPECT_TRUE(vixl::w19.Is(Arm64Assembler::reg_w(W19)));
-  EXPECT_TRUE(vixl::w20.Is(Arm64Assembler::reg_w(W20)));
-  EXPECT_TRUE(vixl::w21.Is(Arm64Assembler::reg_w(W21)));
-  EXPECT_TRUE(vixl::w22.Is(Arm64Assembler::reg_w(W22)));
-  EXPECT_TRUE(vixl::w23.Is(Arm64Assembler::reg_w(W23)));
-  EXPECT_TRUE(vixl::w24.Is(Arm64Assembler::reg_w(W24)));
-  EXPECT_TRUE(vixl::w25.Is(Arm64Assembler::reg_w(W25)));
-  EXPECT_TRUE(vixl::w26.Is(Arm64Assembler::reg_w(W26)));
-  EXPECT_TRUE(vixl::w27.Is(Arm64Assembler::reg_w(W27)));
-  EXPECT_TRUE(vixl::w28.Is(Arm64Assembler::reg_w(W28)));
-  EXPECT_TRUE(vixl::w29.Is(Arm64Assembler::reg_w(W29)));
-  EXPECT_TRUE(vixl::w30.Is(Arm64Assembler::reg_w(W30)));
-  EXPECT_TRUE(vixl::w31.Is(Arm64Assembler::reg_w(WZR)));
-  EXPECT_TRUE(vixl::wzr.Is(Arm64Assembler::reg_w(WZR)));
-  EXPECT_TRUE(vixl::wsp.Is(Arm64Assembler::reg_w(WSP)));
+  EXPECT_TRUE(vixl::aarch64::w0.Is(Arm64Assembler::reg_w(W0)));
+  EXPECT_TRUE(vixl::aarch64::w1.Is(Arm64Assembler::reg_w(W1)));
+  EXPECT_TRUE(vixl::aarch64::w2.Is(Arm64Assembler::reg_w(W2)));
+  EXPECT_TRUE(vixl::aarch64::w3.Is(Arm64Assembler::reg_w(W3)));
+  EXPECT_TRUE(vixl::aarch64::w4.Is(Arm64Assembler::reg_w(W4)));
+  EXPECT_TRUE(vixl::aarch64::w5.Is(Arm64Assembler::reg_w(W5)));
+  EXPECT_TRUE(vixl::aarch64::w6.Is(Arm64Assembler::reg_w(W6)));
+  EXPECT_TRUE(vixl::aarch64::w7.Is(Arm64Assembler::reg_w(W7)));
+  EXPECT_TRUE(vixl::aarch64::w8.Is(Arm64Assembler::reg_w(W8)));
+  EXPECT_TRUE(vixl::aarch64::w9.Is(Arm64Assembler::reg_w(W9)));
+  EXPECT_TRUE(vixl::aarch64::w10.Is(Arm64Assembler::reg_w(W10)));
+  EXPECT_TRUE(vixl::aarch64::w11.Is(Arm64Assembler::reg_w(W11)));
+  EXPECT_TRUE(vixl::aarch64::w12.Is(Arm64Assembler::reg_w(W12)));
+  EXPECT_TRUE(vixl::aarch64::w13.Is(Arm64Assembler::reg_w(W13)));
+  EXPECT_TRUE(vixl::aarch64::w14.Is(Arm64Assembler::reg_w(W14)));
+  EXPECT_TRUE(vixl::aarch64::w15.Is(Arm64Assembler::reg_w(W15)));
+  EXPECT_TRUE(vixl::aarch64::w16.Is(Arm64Assembler::reg_w(W16)));
+  EXPECT_TRUE(vixl::aarch64::w17.Is(Arm64Assembler::reg_w(W17)));
+  EXPECT_TRUE(vixl::aarch64::w18.Is(Arm64Assembler::reg_w(W18)));
+  EXPECT_TRUE(vixl::aarch64::w19.Is(Arm64Assembler::reg_w(W19)));
+  EXPECT_TRUE(vixl::aarch64::w20.Is(Arm64Assembler::reg_w(W20)));
+  EXPECT_TRUE(vixl::aarch64::w21.Is(Arm64Assembler::reg_w(W21)));
+  EXPECT_TRUE(vixl::aarch64::w22.Is(Arm64Assembler::reg_w(W22)));
+  EXPECT_TRUE(vixl::aarch64::w23.Is(Arm64Assembler::reg_w(W23)));
+  EXPECT_TRUE(vixl::aarch64::w24.Is(Arm64Assembler::reg_w(W24)));
+  EXPECT_TRUE(vixl::aarch64::w25.Is(Arm64Assembler::reg_w(W25)));
+  EXPECT_TRUE(vixl::aarch64::w26.Is(Arm64Assembler::reg_w(W26)));
+  EXPECT_TRUE(vixl::aarch64::w27.Is(Arm64Assembler::reg_w(W27)));
+  EXPECT_TRUE(vixl::aarch64::w28.Is(Arm64Assembler::reg_w(W28)));
+  EXPECT_TRUE(vixl::aarch64::w29.Is(Arm64Assembler::reg_w(W29)));
+  EXPECT_TRUE(vixl::aarch64::w30.Is(Arm64Assembler::reg_w(W30)));
+  EXPECT_TRUE(vixl::aarch64::w31.Is(Arm64Assembler::reg_w(WZR)));
+  EXPECT_TRUE(vixl::aarch64::wzr.Is(Arm64Assembler::reg_w(WZR)));
+  EXPECT_TRUE(vixl::aarch64::wsp.Is(Arm64Assembler::reg_w(WSP)));
 
   // D Registers.
-  EXPECT_TRUE(vixl::d0.Is(Arm64Assembler::reg_d(D0)));
-  EXPECT_TRUE(vixl::d1.Is(Arm64Assembler::reg_d(D1)));
-  EXPECT_TRUE(vixl::d2.Is(Arm64Assembler::reg_d(D2)));
-  EXPECT_TRUE(vixl::d3.Is(Arm64Assembler::reg_d(D3)));
-  EXPECT_TRUE(vixl::d4.Is(Arm64Assembler::reg_d(D4)));
-  EXPECT_TRUE(vixl::d5.Is(Arm64Assembler::reg_d(D5)));
-  EXPECT_TRUE(vixl::d6.Is(Arm64Assembler::reg_d(D6)));
-  EXPECT_TRUE(vixl::d7.Is(Arm64Assembler::reg_d(D7)));
-  EXPECT_TRUE(vixl::d8.Is(Arm64Assembler::reg_d(D8)));
-  EXPECT_TRUE(vixl::d9.Is(Arm64Assembler::reg_d(D9)));
-  EXPECT_TRUE(vixl::d10.Is(Arm64Assembler::reg_d(D10)));
-  EXPECT_TRUE(vixl::d11.Is(Arm64Assembler::reg_d(D11)));
-  EXPECT_TRUE(vixl::d12.Is(Arm64Assembler::reg_d(D12)));
-  EXPECT_TRUE(vixl::d13.Is(Arm64Assembler::reg_d(D13)));
-  EXPECT_TRUE(vixl::d14.Is(Arm64Assembler::reg_d(D14)));
-  EXPECT_TRUE(vixl::d15.Is(Arm64Assembler::reg_d(D15)));
-  EXPECT_TRUE(vixl::d16.Is(Arm64Assembler::reg_d(D16)));
-  EXPECT_TRUE(vixl::d17.Is(Arm64Assembler::reg_d(D17)));
-  EXPECT_TRUE(vixl::d18.Is(Arm64Assembler::reg_d(D18)));
-  EXPECT_TRUE(vixl::d19.Is(Arm64Assembler::reg_d(D19)));
-  EXPECT_TRUE(vixl::d20.Is(Arm64Assembler::reg_d(D20)));
-  EXPECT_TRUE(vixl::d21.Is(Arm64Assembler::reg_d(D21)));
-  EXPECT_TRUE(vixl::d22.Is(Arm64Assembler::reg_d(D22)));
-  EXPECT_TRUE(vixl::d23.Is(Arm64Assembler::reg_d(D23)));
-  EXPECT_TRUE(vixl::d24.Is(Arm64Assembler::reg_d(D24)));
-  EXPECT_TRUE(vixl::d25.Is(Arm64Assembler::reg_d(D25)));
-  EXPECT_TRUE(vixl::d26.Is(Arm64Assembler::reg_d(D26)));
-  EXPECT_TRUE(vixl::d27.Is(Arm64Assembler::reg_d(D27)));
-  EXPECT_TRUE(vixl::d28.Is(Arm64Assembler::reg_d(D28)));
-  EXPECT_TRUE(vixl::d29.Is(Arm64Assembler::reg_d(D29)));
-  EXPECT_TRUE(vixl::d30.Is(Arm64Assembler::reg_d(D30)));
-  EXPECT_TRUE(vixl::d31.Is(Arm64Assembler::reg_d(D31)));
+  EXPECT_TRUE(vixl::aarch64::d0.Is(Arm64Assembler::reg_d(D0)));
+  EXPECT_TRUE(vixl::aarch64::d1.Is(Arm64Assembler::reg_d(D1)));
+  EXPECT_TRUE(vixl::aarch64::d2.Is(Arm64Assembler::reg_d(D2)));
+  EXPECT_TRUE(vixl::aarch64::d3.Is(Arm64Assembler::reg_d(D3)));
+  EXPECT_TRUE(vixl::aarch64::d4.Is(Arm64Assembler::reg_d(D4)));
+  EXPECT_TRUE(vixl::aarch64::d5.Is(Arm64Assembler::reg_d(D5)));
+  EXPECT_TRUE(vixl::aarch64::d6.Is(Arm64Assembler::reg_d(D6)));
+  EXPECT_TRUE(vixl::aarch64::d7.Is(Arm64Assembler::reg_d(D7)));
+  EXPECT_TRUE(vixl::aarch64::d8.Is(Arm64Assembler::reg_d(D8)));
+  EXPECT_TRUE(vixl::aarch64::d9.Is(Arm64Assembler::reg_d(D9)));
+  EXPECT_TRUE(vixl::aarch64::d10.Is(Arm64Assembler::reg_d(D10)));
+  EXPECT_TRUE(vixl::aarch64::d11.Is(Arm64Assembler::reg_d(D11)));
+  EXPECT_TRUE(vixl::aarch64::d12.Is(Arm64Assembler::reg_d(D12)));
+  EXPECT_TRUE(vixl::aarch64::d13.Is(Arm64Assembler::reg_d(D13)));
+  EXPECT_TRUE(vixl::aarch64::d14.Is(Arm64Assembler::reg_d(D14)));
+  EXPECT_TRUE(vixl::aarch64::d15.Is(Arm64Assembler::reg_d(D15)));
+  EXPECT_TRUE(vixl::aarch64::d16.Is(Arm64Assembler::reg_d(D16)));
+  EXPECT_TRUE(vixl::aarch64::d17.Is(Arm64Assembler::reg_d(D17)));
+  EXPECT_TRUE(vixl::aarch64::d18.Is(Arm64Assembler::reg_d(D18)));
+  EXPECT_TRUE(vixl::aarch64::d19.Is(Arm64Assembler::reg_d(D19)));
+  EXPECT_TRUE(vixl::aarch64::d20.Is(Arm64Assembler::reg_d(D20)));
+  EXPECT_TRUE(vixl::aarch64::d21.Is(Arm64Assembler::reg_d(D21)));
+  EXPECT_TRUE(vixl::aarch64::d22.Is(Arm64Assembler::reg_d(D22)));
+  EXPECT_TRUE(vixl::aarch64::d23.Is(Arm64Assembler::reg_d(D23)));
+  EXPECT_TRUE(vixl::aarch64::d24.Is(Arm64Assembler::reg_d(D24)));
+  EXPECT_TRUE(vixl::aarch64::d25.Is(Arm64Assembler::reg_d(D25)));
+  EXPECT_TRUE(vixl::aarch64::d26.Is(Arm64Assembler::reg_d(D26)));
+  EXPECT_TRUE(vixl::aarch64::d27.Is(Arm64Assembler::reg_d(D27)));
+  EXPECT_TRUE(vixl::aarch64::d28.Is(Arm64Assembler::reg_d(D28)));
+  EXPECT_TRUE(vixl::aarch64::d29.Is(Arm64Assembler::reg_d(D29)));
+  EXPECT_TRUE(vixl::aarch64::d30.Is(Arm64Assembler::reg_d(D30)));
+  EXPECT_TRUE(vixl::aarch64::d31.Is(Arm64Assembler::reg_d(D31)));
 
   // S Registers.
-  EXPECT_TRUE(vixl::s0.Is(Arm64Assembler::reg_s(S0)));
-  EXPECT_TRUE(vixl::s1.Is(Arm64Assembler::reg_s(S1)));
-  EXPECT_TRUE(vixl::s2.Is(Arm64Assembler::reg_s(S2)));
-  EXPECT_TRUE(vixl::s3.Is(Arm64Assembler::reg_s(S3)));
-  EXPECT_TRUE(vixl::s4.Is(Arm64Assembler::reg_s(S4)));
-  EXPECT_TRUE(vixl::s5.Is(Arm64Assembler::reg_s(S5)));
-  EXPECT_TRUE(vixl::s6.Is(Arm64Assembler::reg_s(S6)));
-  EXPECT_TRUE(vixl::s7.Is(Arm64Assembler::reg_s(S7)));
-  EXPECT_TRUE(vixl::s8.Is(Arm64Assembler::reg_s(S8)));
-  EXPECT_TRUE(vixl::s9.Is(Arm64Assembler::reg_s(S9)));
-  EXPECT_TRUE(vixl::s10.Is(Arm64Assembler::reg_s(S10)));
-  EXPECT_TRUE(vixl::s11.Is(Arm64Assembler::reg_s(S11)));
-  EXPECT_TRUE(vixl::s12.Is(Arm64Assembler::reg_s(S12)));
-  EXPECT_TRUE(vixl::s13.Is(Arm64Assembler::reg_s(S13)));
-  EXPECT_TRUE(vixl::s14.Is(Arm64Assembler::reg_s(S14)));
-  EXPECT_TRUE(vixl::s15.Is(Arm64Assembler::reg_s(S15)));
-  EXPECT_TRUE(vixl::s16.Is(Arm64Assembler::reg_s(S16)));
-  EXPECT_TRUE(vixl::s17.Is(Arm64Assembler::reg_s(S17)));
-  EXPECT_TRUE(vixl::s18.Is(Arm64Assembler::reg_s(S18)));
-  EXPECT_TRUE(vixl::s19.Is(Arm64Assembler::reg_s(S19)));
-  EXPECT_TRUE(vixl::s20.Is(Arm64Assembler::reg_s(S20)));
-  EXPECT_TRUE(vixl::s21.Is(Arm64Assembler::reg_s(S21)));
-  EXPECT_TRUE(vixl::s22.Is(Arm64Assembler::reg_s(S22)));
-  EXPECT_TRUE(vixl::s23.Is(Arm64Assembler::reg_s(S23)));
-  EXPECT_TRUE(vixl::s24.Is(Arm64Assembler::reg_s(S24)));
-  EXPECT_TRUE(vixl::s25.Is(Arm64Assembler::reg_s(S25)));
-  EXPECT_TRUE(vixl::s26.Is(Arm64Assembler::reg_s(S26)));
-  EXPECT_TRUE(vixl::s27.Is(Arm64Assembler::reg_s(S27)));
-  EXPECT_TRUE(vixl::s28.Is(Arm64Assembler::reg_s(S28)));
-  EXPECT_TRUE(vixl::s29.Is(Arm64Assembler::reg_s(S29)));
-  EXPECT_TRUE(vixl::s30.Is(Arm64Assembler::reg_s(S30)));
-  EXPECT_TRUE(vixl::s31.Is(Arm64Assembler::reg_s(S31)));
+  EXPECT_TRUE(vixl::aarch64::s0.Is(Arm64Assembler::reg_s(S0)));
+  EXPECT_TRUE(vixl::aarch64::s1.Is(Arm64Assembler::reg_s(S1)));
+  EXPECT_TRUE(vixl::aarch64::s2.Is(Arm64Assembler::reg_s(S2)));
+  EXPECT_TRUE(vixl::aarch64::s3.Is(Arm64Assembler::reg_s(S3)));
+  EXPECT_TRUE(vixl::aarch64::s4.Is(Arm64Assembler::reg_s(S4)));
+  EXPECT_TRUE(vixl::aarch64::s5.Is(Arm64Assembler::reg_s(S5)));
+  EXPECT_TRUE(vixl::aarch64::s6.Is(Arm64Assembler::reg_s(S6)));
+  EXPECT_TRUE(vixl::aarch64::s7.Is(Arm64Assembler::reg_s(S7)));
+  EXPECT_TRUE(vixl::aarch64::s8.Is(Arm64Assembler::reg_s(S8)));
+  EXPECT_TRUE(vixl::aarch64::s9.Is(Arm64Assembler::reg_s(S9)));
+  EXPECT_TRUE(vixl::aarch64::s10.Is(Arm64Assembler::reg_s(S10)));
+  EXPECT_TRUE(vixl::aarch64::s11.Is(Arm64Assembler::reg_s(S11)));
+  EXPECT_TRUE(vixl::aarch64::s12.Is(Arm64Assembler::reg_s(S12)));
+  EXPECT_TRUE(vixl::aarch64::s13.Is(Arm64Assembler::reg_s(S13)));
+  EXPECT_TRUE(vixl::aarch64::s14.Is(Arm64Assembler::reg_s(S14)));
+  EXPECT_TRUE(vixl::aarch64::s15.Is(Arm64Assembler::reg_s(S15)));
+  EXPECT_TRUE(vixl::aarch64::s16.Is(Arm64Assembler::reg_s(S16)));
+  EXPECT_TRUE(vixl::aarch64::s17.Is(Arm64Assembler::reg_s(S17)));
+  EXPECT_TRUE(vixl::aarch64::s18.Is(Arm64Assembler::reg_s(S18)));
+  EXPECT_TRUE(vixl::aarch64::s19.Is(Arm64Assembler::reg_s(S19)));
+  EXPECT_TRUE(vixl::aarch64::s20.Is(Arm64Assembler::reg_s(S20)));
+  EXPECT_TRUE(vixl::aarch64::s21.Is(Arm64Assembler::reg_s(S21)));
+  EXPECT_TRUE(vixl::aarch64::s22.Is(Arm64Assembler::reg_s(S22)));
+  EXPECT_TRUE(vixl::aarch64::s23.Is(Arm64Assembler::reg_s(S23)));
+  EXPECT_TRUE(vixl::aarch64::s24.Is(Arm64Assembler::reg_s(S24)));
+  EXPECT_TRUE(vixl::aarch64::s25.Is(Arm64Assembler::reg_s(S25)));
+  EXPECT_TRUE(vixl::aarch64::s26.Is(Arm64Assembler::reg_s(S26)));
+  EXPECT_TRUE(vixl::aarch64::s27.Is(Arm64Assembler::reg_s(S27)));
+  EXPECT_TRUE(vixl::aarch64::s28.Is(Arm64Assembler::reg_s(S28)));
+  EXPECT_TRUE(vixl::aarch64::s29.Is(Arm64Assembler::reg_s(S29)));
+  EXPECT_TRUE(vixl::aarch64::s30.Is(Arm64Assembler::reg_s(S30)));
+  EXPECT_TRUE(vixl::aarch64::s31.Is(Arm64Assembler::reg_s(S31)));
 }
 
 }  // namespace arm64
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index 5c33639..8dc9ab4 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -39,9 +39,6 @@
  */
 template <typename T>
 class ArrayRef {
- private:
-  struct tag { };
-
  public:
   typedef T value_type;
   typedef T& reference;
@@ -63,14 +60,14 @@
 
   template <size_t size>
   explicit constexpr ArrayRef(T (&array)[size])
-    : array_(array), size_(size) {
+      : array_(array), size_(size) {
   }
 
-  template <typename U, size_t size>
-  explicit constexpr ArrayRef(U (&array)[size],
-                              typename std::enable_if<std::is_same<T, const U>::value, tag>::type
-                                  t ATTRIBUTE_UNUSED = tag())
-    : array_(array), size_(size) {
+  template <typename U,
+            size_t size,
+            typename = typename std::enable_if<std::is_same<T, const U>::value>::type>
+  explicit constexpr ArrayRef(U (&array)[size])
+      : array_(array), size_(size) {
   }
 
   constexpr ArrayRef(T* array_in, size_t size_in)
@@ -165,13 +162,21 @@
   value_type* data() { return array_; }
   const value_type* data() const { return array_; }
 
-  ArrayRef SubArray(size_type pos) const {
-    return SubArray(pos, size_ - pos);
+  ArrayRef SubArray(size_type pos) {
+    return SubArray(pos, size() - pos);
   }
-  ArrayRef SubArray(size_type pos, size_type length) const {
+  ArrayRef<const T> SubArray(size_type pos) const {
+    return SubArray(pos, size() - pos);
+  }
+  ArrayRef SubArray(size_type pos, size_type length) {
     DCHECK_LE(pos, size());
     DCHECK_LE(length, size() - pos);
-    return ArrayRef(array_ + pos, length);
+    return ArrayRef(data() + pos, length);
+  }
+  ArrayRef<const T> SubArray(size_type pos, size_type length) const {
+    DCHECK_LE(pos, size());
+    DCHECK_LE(length, size() - pos);
+    return ArrayRef<const T>(data() + pos, length);
   }
 
  private:
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index e6c3a18..81159e6 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -121,133 +121,4 @@
   }
 }
 
-std::unique_ptr<Assembler> Assembler::Create(
-    ArenaAllocator* arena,
-    InstructionSet instruction_set,
-    const InstructionSetFeatures* instruction_set_features) {
-  switch (instruction_set) {
-#ifdef ART_ENABLE_CODEGEN_arm
-    case kArm:
-      return std::unique_ptr<Assembler>(new (arena) arm::Arm32Assembler(arena));
-    case kThumb2:
-      return std::unique_ptr<Assembler>(new (arena) arm::Thumb2Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_arm64
-    case kArm64:
-      return std::unique_ptr<Assembler>(new (arena) arm64::Arm64Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips
-    case kMips:
-      return std::unique_ptr<Assembler>(new (arena) mips::MipsAssembler(
-          arena,
-          instruction_set_features != nullptr
-              ? instruction_set_features->AsMipsInstructionSetFeatures()
-              : nullptr));
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips64
-    case kMips64:
-      return std::unique_ptr<Assembler>(new (arena) mips64::Mips64Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86
-    case kX86:
-      return std::unique_ptr<Assembler>(new (arena) x86::X86Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86_64
-    case kX86_64:
-      return std::unique_ptr<Assembler>(new (arena) x86_64::X86_64Assembler(arena));
-#endif
-    default:
-      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
-  }
-}
-
-void Assembler::StoreImmediateToThread32(ThreadOffset<4> dest ATTRIBUTE_UNUSED,
-                                         uint32_t imm ATTRIBUTE_UNUSED,
-                                         ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreImmediateToThread64(ThreadOffset<8> dest ATTRIBUTE_UNUSED,
-                                         uint32_t imm ATTRIBUTE_UNUSED,
-                                         ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED,
-                                           FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                           ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED,
-                                           FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                           ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                 ThreadOffset<4> src ATTRIBUTE_UNUSED,
-                                 size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                 ThreadOffset<8> src ATTRIBUTE_UNUSED,
-                                 size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadRawPtrFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                       ThreadOffset<4> offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadRawPtrFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                       ThreadOffset<8> offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                       ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED,
-                                       ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                       ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED,
-                                       ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED,
-                                     FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                     ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED,
-                                     FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                     ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CallFromThread32(ThreadOffset<4> offset ATTRIBUTE_UNUSED,
-                                 ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CallFromThread64(ThreadOffset<8> offset ATTRIBUTE_UNUSED,
-                                 ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
 }  // namespace art
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 96da03d..8981776 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -24,6 +24,7 @@
 #include "arm/constants_arm.h"
 #include "base/arena_allocator.h"
 #include "base/arena_object.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "debug/dwarf/debug_frame_opcode_writer.h"
@@ -32,6 +33,7 @@
 #include "memory_region.h"
 #include "mips/constants_mips.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "x86/constants_x86.h"
 #include "x86_64/constants_x86_64.h"
 
@@ -311,8 +313,10 @@
   // Override the last delayed PC. The new PC can be out of order.
   void OverrideDelayedPC(size_t pc) {
     DCHECK(delay_emitting_advance_pc_);
-    DCHECK(!delayed_advance_pcs_.empty());
-    delayed_advance_pcs_.back().pc = pc;
+    if (enabled_) {
+      DCHECK(!delayed_advance_pcs_.empty());
+      delayed_advance_pcs_.back().pc = pc;
+    }
   }
 
   // Return the number of delayed advance PC entries.
@@ -352,11 +356,6 @@
 
 class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
-  static std::unique_ptr<Assembler> Create(
-      ArenaAllocator* arena,
-      InstructionSet instruction_set,
-      const InstructionSetFeatures* instruction_set_features = nullptr);
-
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
   virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
 
@@ -372,140 +371,6 @@
   // TODO: Implement with disassembler.
   virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
 
-  // Emit code that will create an activation on the stack
-  virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& callee_save_regs,
-                          const ManagedRegisterEntrySpills& entry_spills) = 0;
-
-  // Emit code that will remove an activation from the stack
-  virtual void RemoveFrame(size_t frame_size,
-                           const std::vector<ManagedRegister>& callee_save_regs) = 0;
-
-  virtual void IncreaseFrameSize(size_t adjust) = 0;
-  virtual void DecreaseFrameSize(size_t adjust) = 0;
-
-  // Store routines
-  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
-  virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
-  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
-
-  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                     ManagedRegister scratch) = 0;
-
-  virtual void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm,
-                                        ManagedRegister scratch);
-  virtual void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
-                                        ManagedRegister scratch);
-
-  virtual void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister scratch);
-  virtual void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister scratch);
-
-  virtual void StoreStackPointerToThread32(ThreadOffset<4> thr_offs);
-  virtual void StoreStackPointerToThread64(ThreadOffset<8> thr_offs);
-
-  virtual void StoreSpanning(FrameOffset dest, ManagedRegister src,
-                             FrameOffset in_off, ManagedRegister scratch) = 0;
-
-  // Load routines
-  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
-
-  virtual void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size);
-  virtual void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size);
-
-  virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
-  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
-  virtual void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-                       bool unpoison_reference) = 0;
-
-  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
-
-  virtual void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs);
-  virtual void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs);
-
-  // Copying routines
-  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
-
-  virtual void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
-                                      ManagedRegister scratch);
-  virtual void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
-                                      ManagedRegister scratch);
-
-  virtual void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
-                                    ManagedRegister scratch);
-  virtual void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
-                                    ManagedRegister scratch);
-
-  virtual void CopyRef(FrameOffset dest, FrameOffset src,
-                       ManagedRegister scratch) = 0;
-
-  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(ManagedRegister dest, Offset dest_offset,
-                    ManagedRegister src, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void MemoryBarrier(ManagedRegister scratch) = 0;
-
-  // Sign extension
-  virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
-
-  // Zero extension
-  virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
-
-  // Exploit fast access in managed code to Thread::Current()
-  virtual void GetCurrentThread(ManagedRegister tr) = 0;
-  virtual void GetCurrentThread(FrameOffset dest_offset,
-                                ManagedRegister scratch) = 0;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  virtual void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                               ManagedRegister in_reg, bool null_allowed) = 0;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  virtual void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                               ManagedRegister scratch, bool null_allowed) = 0;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  virtual void LoadReferenceFromHandleScope(ManagedRegister dst,
-                                     ManagedRegister src) = 0;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
-  virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
-
-  // Call to address held at [base+offset]
-  virtual void Call(ManagedRegister base, Offset offset,
-                    ManagedRegister scratch) = 0;
-  virtual void Call(FrameOffset base, Offset offset,
-                    ManagedRegister scratch) = 0;
-  virtual void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch);
-  virtual void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch);
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
-
   virtual void Bind(Label* label) = 0;
   virtual void Jump(Label* label) = 0;
 
@@ -517,13 +382,17 @@
    */
   DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
 
- protected:
-  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
-
   ArenaAllocator* GetArena() {
     return buffer_.GetArena();
   }
 
+  AssemblerBuffer* GetBuffer() {
+    return &buffer_;
+  }
+
+ protected:
+  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
+
   AssemblerBuffer buffer_;
 
   DebugFrameOpCodeWriterForAssembler cfi_;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 084e901..92b4c8e 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -344,6 +344,17 @@
   }
 
   template <typename ImmType>
+  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegistersImmBits<FPReg, FPReg, ImmType>(f,
+                                                                  imm_bits,
+                                                                  GetFPRegisters(),
+                                                                  GetFPRegisters(),
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  fmt);
+  }
+
+  template <typename ImmType>
   std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) {
     return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f,
                                                                   GetFPRegisters(),
@@ -461,7 +472,7 @@
 
   void SetUp() OVERRIDE {
     arena_.reset(new ArenaAllocator(&pool_));
-    assembler_.reset(new (arena_.get()) Ass(arena_.get()));
+    assembler_.reset(CreateAssembler(arena_.get()));
     test_helper_.reset(
         new AssemblerTestInfrastructure(GetArchitectureString(),
                                         GetAssemblerCmdName(),
@@ -481,6 +492,11 @@
     arena_.reset();
   }
 
+  // Override this to set up any architecture-specific things, e.g., CPU revision.
+  virtual Ass* CreateAssembler(ArenaAllocator* arena) {
+    return new (arena) Ass(arena);
+  }
+
   // Override this to set up any architecture-specific things, e.g., register vectors.
   virtual void SetUpHelpers() {}
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index c67cb5a..9c9271d 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -32,7 +32,7 @@
 // Include results file (generated manually)
 #include "assembler_thumb_test_expected.cc.inc"
 
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 // This controls whether the results are printed to the
 // screen or compared against the expected output.
 // To generate new expected output, set this to true and
@@ -72,7 +72,7 @@
 }
 
 std::string GetToolsDir() {
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
   // This will only work on the host.  There is no as, objcopy or objdump on the device.
   static std::string toolsdir;
 
@@ -89,7 +89,7 @@
 }
 
 void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) {
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
   static std::string toolsdir = GetToolsDir();
 
   ScratchFile file;
@@ -169,7 +169,7 @@
 
   snprintf(buf, sizeof(buf), "%s.oo", filename);
   unlink(buf);
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 #define __ assembler->
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
new file mode 100644
index 0000000..1b74313
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler.h"
+
+#include <algorithm>
+#include <vector>
+
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "arm/jni_macro_assembler_arm.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "arm64/jni_macro_assembler_arm64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "mips/assembler_mips.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "mips64/assembler_mips64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "x86/jni_macro_assembler_x86.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "x86_64/jni_macro_assembler_x86_64.h"
+#endif
+#include "base/casts.h"
+#include "globals.h"
+#include "memory_region.h"
+
+namespace art {
+
+using MacroAsm32UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k32>>;
+
+template <>
+MacroAsm32UniquePtr JNIMacroAssembler<PointerSize::k32>::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features) {
+#ifndef ART_ENABLE_CODEGEN_mips
+  UNUSED(instruction_set_features);
+#endif
+
+  switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kArm:
+    case kThumb2:
+      return MacroAsm32UniquePtr(new (arena) arm::ArmJNIMacroAssembler(arena, instruction_set));
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips:
+      return MacroAsm32UniquePtr(new (arena) mips::MipsAssembler(
+          arena,
+          instruction_set_features != nullptr
+              ? instruction_set_features->AsMipsInstructionSetFeatures()
+              : nullptr));
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+    case kX86:
+      return MacroAsm32UniquePtr(new (arena) x86::X86JNIMacroAssembler(arena));
+#endif
+    default:
+      LOG(FATAL) << "Unknown/unsupported 4B InstructionSet: " << instruction_set;
+      UNREACHABLE();
+  }
+}
+
+using MacroAsm64UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k64>>;
+
+template <>
+MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features ATTRIBUTE_UNUSED) {
+  switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm64
+    case kArm64:
+      return MacroAsm64UniquePtr(new (arena) arm64::Arm64JNIMacroAssembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+    case kMips64:
+      return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case kX86_64:
+      return MacroAsm64UniquePtr(new (arena) x86_64::X86_64JNIMacroAssembler(arena));
+#endif
+    default:
+      UNUSED(arena);
+      LOG(FATAL) << "Unknown/unsupported 8B InstructionSet: " << instruction_set;
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
new file mode 100644
index 0000000..6f45bd6
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
+#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
+
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
+#include "base/arena_object.h"
+#include "base/enums.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "managed_register.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+
+namespace art {
+
+class ArenaAllocator;
+class DebugFrameOpCodeWriterForAssembler;
+class InstructionSetFeatures;
+class MemoryRegion;
+
+template <PointerSize kPointerSize>
+class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
+ public:
+  static std::unique_ptr<JNIMacroAssembler<kPointerSize>> Create(
+      ArenaAllocator* arena,
+      InstructionSet instruction_set,
+      const InstructionSetFeatures* instruction_set_features = nullptr);
+
+  // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
+  virtual void FinalizeCode() = 0;
+
+  // Size of generated code
+  virtual size_t CodeSize() const = 0;
+
+  // Copy instructions out of assembly buffer into the given region of memory
+  virtual void FinalizeInstructions(const MemoryRegion& region) = 0;
+
+  // Emit code that will create an activation on the stack
+  virtual void BuildFrame(size_t frame_size,
+                          ManagedRegister method_reg,
+                          ArrayRef<const ManagedRegister> callee_save_regs,
+                          const ManagedRegisterEntrySpills& entry_spills) = 0;
+
+  // Emit code that will remove an activation from the stack
+  virtual void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) = 0;
+
+  virtual void IncreaseFrameSize(size_t adjust) = 0;
+  virtual void DecreaseFrameSize(size_t adjust) = 0;
+
+  // Store routines
+  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
+  virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
+  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
+
+  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0;
+
+  virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs,
+                                        FrameOffset fr_offs,
+                                        ManagedRegister scratch) = 0;
+
+  virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0;
+
+  virtual void StoreSpanning(FrameOffset dest,
+                             ManagedRegister src,
+                             FrameOffset in_off,
+                             ManagedRegister scratch) = 0;
+
+  // Load routines
+  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
+
+  virtual void LoadFromThread(ManagedRegister dest,
+                              ThreadOffset<kPointerSize> src,
+                              size_t size) = 0;
+
+  virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
+  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
+  virtual void LoadRef(ManagedRegister dest,
+                       ManagedRegister base,
+                       MemberOffset offs,
+                       bool unpoison_reference) = 0;
+
+  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
+
+  virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0;
+
+  // Copying routines
+  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
+
+  virtual void CopyRawPtrFromThread(FrameOffset fr_offs,
+                                    ThreadOffset<kPointerSize> thr_offs,
+                                    ManagedRegister scratch) = 0;
+
+  virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs,
+                                  FrameOffset fr_offs,
+                                  ManagedRegister scratch) = 0;
+
+  virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0;
+
+  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    ManagedRegister src_base,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(ManagedRegister dest_base,
+                    Offset dest_offset,
+                    FrameOffset src,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    FrameOffset src_base,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(ManagedRegister dest,
+                    Offset dest_offset,
+                    ManagedRegister src,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    Offset dest_offset,
+                    FrameOffset src,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void MemoryBarrier(ManagedRegister scratch) = 0;
+
+  // Sign extension
+  virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
+
+  // Zero extension
+  virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
+
+  // Exploit fast access in managed code to Thread::Current()
+  virtual void GetCurrentThread(ManagedRegister tr) = 0;
+  virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  virtual void CreateHandleScopeEntry(ManagedRegister out_reg,
+                                      FrameOffset handlescope_offset,
+                                      ManagedRegister in_reg,
+                                      bool null_allowed) = 0;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  virtual void CreateHandleScopeEntry(FrameOffset out_off,
+                                      FrameOffset handlescope_offset,
+                                      ManagedRegister scratch,
+                                      bool null_allowed) = 0;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) = 0;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
+  virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
+
+  // Call to address held at [base+offset]
+  virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void CallFromThread(ThreadOffset<kPointerSize> offset, ManagedRegister scratch) = 0;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
+
+  virtual ~JNIMacroAssembler() {}
+
+  /**
+   * @brief Buffer of DWARF's Call Frame Information opcodes.
+   * @details It is used by debuggers and other tools to unwind the call stack.
+   */
+  virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0;
+
+ protected:
+  explicit JNIMacroAssembler() {}
+};
+
+template <typename T, PointerSize kPointerSize>
+class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> {
+ public:
+  void FinalizeCode() OVERRIDE {
+    asm_.FinalizeCode();
+  }
+
+  size_t CodeSize() const OVERRIDE {
+    return asm_.CodeSize();
+  }
+
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
+    asm_.FinalizeInstructions(region);
+  }
+
+  DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE {
+    return asm_.cfi();
+  }
+
+ protected:
+  explicit JNIMacroAssemblerFwd(ArenaAllocator* arena) : asm_(arena) {}
+
+  T asm_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
new file mode 100644
index 0000000..829f34b
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
+#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
+
+#include "jni_macro_assembler.h"
+
+#include "assembler_test_base.h"
+#include "common_runtime_test.h"  // For ScratchFile
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+#include <sys/stat.h>
+
+namespace art {
+
+template<typename Ass>
+class JNIMacroAssemblerTest : public testing::Test {
+ public:
+  Ass* GetAssembler() {
+    return assembler_.get();
+  }
+
+  typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler);
+
+  void DriverFn(TestFn f, std::string test_name) {
+    DriverWrapper(f(this, assembler_.get()), test_name);
+  }
+
+  // This driver assumes the assembler has already been called.
+  void DriverStr(std::string assembly_string, std::string test_name) {
+    DriverWrapper(assembly_string, test_name);
+  }
+
+  // This is intended to be run as a test.
+  bool CheckTools() {
+    return test_helper_->CheckTools();
+  }
+
+ protected:
+  explicit JNIMacroAssemblerTest() {}
+
+  void SetUp() OVERRIDE {
+    arena_.reset(new ArenaAllocator(&pool_));
+    assembler_.reset(CreateAssembler(arena_.get()));
+    test_helper_.reset(
+        new AssemblerTestInfrastructure(GetArchitectureString(),
+                                        GetAssemblerCmdName(),
+                                        GetAssemblerParameters(),
+                                        GetObjdumpCmdName(),
+                                        GetObjdumpParameters(),
+                                        GetDisassembleCmdName(),
+                                        GetDisassembleParameters(),
+                                        GetAssemblyHeader()));
+
+    SetUpHelpers();
+  }
+
+  void TearDown() OVERRIDE {
+    test_helper_.reset();  // Clean up the helper.
+    assembler_.reset();
+    arena_.reset();
+  }
+
+  // Override this to set up any architecture-specific things, e.g., CPU revision.
+  virtual Ass* CreateAssembler(ArenaAllocator* arena) {
+    return new (arena) Ass(arena);
+  }
+
+  // Override this to set up any architecture-specific things, e.g., register vectors.
+  virtual void SetUpHelpers() {}
+
+  // Get the typically used name for this architecture, e.g., aarch64, x86_64, ...
+  virtual std::string GetArchitectureString() = 0;
+
+  // Get the name of the assembler, e.g., "as" by default.
+  virtual std::string GetAssemblerCmdName() {
+    return "as";
+  }
+
+  // Switches to the assembler command. Default none.
+  virtual std::string GetAssemblerParameters() {
+    return "";
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetObjdumpCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. Default is " -h".
+  virtual std::string GetObjdumpParameters() {
+    return " -h";
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetDisassembleCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. As it's a binary, one needs to push the architecture and
+  // such to objdump, so it's architecture-specific and there is no default.
+  virtual std::string GetDisassembleParameters() = 0;
+
+  // If the assembly file needs a header, return it in a sub-class.
+  virtual const char* GetAssemblyHeader() {
+    return nullptr;
+  }
+
+ private:
+  // Override this to pad the code with NOPs to a certain size if needed.
+  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
+  }
+
+  void DriverWrapper(std::string assembly_text, std::string test_name) {
+    assembler_->FinalizeCode();
+    size_t cs = assembler_->CodeSize();
+    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
+    MemoryRegion code(&(*data)[0], data->size());
+    assembler_->FinalizeInstructions(code);
+    Pad(*data);
+    test_helper_->Driver(*data, assembly_text, test_name);
+  }
+
+  ArenaPool pool_;
+  std::unique_ptr<ArenaAllocator> arena_;
+  std::unique_ptr<Ass> assembler_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
+
+  DISALLOW_COPY_AND_ASSIGN(JNIMacroAssemblerTest);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 893daff..46adb3f 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -47,40 +47,40 @@
   // ManagedRegister is a value class. There exists no method to change the
   // internal state. We therefore allow a copy constructor and an
   // assignment-operator.
-  ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
+  constexpr ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
 
   ManagedRegister& operator=(const ManagedRegister& other) {
     id_ = other.id_;
     return *this;
   }
 
-  arm::ArmManagedRegister AsArm() const;
-  arm64::Arm64ManagedRegister AsArm64() const;
-  mips::MipsManagedRegister AsMips() const;
-  mips64::Mips64ManagedRegister AsMips64() const;
-  x86::X86ManagedRegister AsX86() const;
-  x86_64::X86_64ManagedRegister AsX86_64() const;
+  constexpr arm::ArmManagedRegister AsArm() const;
+  constexpr arm64::Arm64ManagedRegister AsArm64() const;
+  constexpr mips::MipsManagedRegister AsMips() const;
+  constexpr mips64::Mips64ManagedRegister AsMips64() const;
+  constexpr x86::X86ManagedRegister AsX86() const;
+  constexpr x86_64::X86_64ManagedRegister AsX86_64() const;
 
   // It is valid to invoke Equals on and with a NoRegister.
-  bool Equals(const ManagedRegister& other) const {
+  constexpr bool Equals(const ManagedRegister& other) const {
     return id_ == other.id_;
   }
 
-  bool IsNoRegister() const {
+  constexpr bool IsNoRegister() const {
     return id_ == kNoRegister;
   }
 
-  static ManagedRegister NoRegister() {
+  static constexpr ManagedRegister NoRegister() {
     return ManagedRegister();
   }
 
-  int RegId() const { return id_; }
-  explicit ManagedRegister(int reg_id) : id_(reg_id) { }
+  constexpr int RegId() const { return id_; }
+  explicit constexpr ManagedRegister(int reg_id) : id_(reg_id) { }
 
  protected:
   static const int kNoRegister = -1;
 
-  ManagedRegister() : id_(kNoRegister) { }
+  constexpr ManagedRegister() : id_(kNoRegister) { }
 
   int id_;
 };
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index a1798c0..8b7da3f 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -26,6 +26,11 @@
 namespace art {
 namespace mips {
 
+static_assert(static_cast<size_t>(kMipsPointerSize) == kMipsWordSize,
+              "Unexpected Mips pointer size.");
+static_assert(kMipsPointerSize == PointerSize::k32, "Unexpected Mips pointer size.");
+
+
 std::ostream& operator<<(std::ostream& os, const DRegister& rhs) {
   if (rhs >= D0 && rhs < kNumberOfDRegisters) {
     os << "d" << static_cast<int>(rhs);
@@ -39,6 +44,7 @@
   for (auto& exception_block : exception_blocks_) {
     EmitExceptionPoll(&exception_block);
   }
+  EmitLiterals();
   PromoteBranches();
 }
 
@@ -444,10 +450,21 @@
   EmitI(0x25, rs, rt, imm16);
 }
 
+void MipsAssembler::Lwpc(Register rs, uint32_t imm19) {
+  CHECK(IsR6());
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, (0x01 << 19) | imm19);
+}
+
 void MipsAssembler::Lui(Register rt, uint16_t imm16) {
   EmitI(0xf, static_cast<Register>(0), rt, imm16);
 }
 
+void MipsAssembler::Aui(Register rt, Register rs, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitI(0xf, rs, rt, imm16);
+}
+
 void MipsAssembler::Sync(uint32_t stype) {
   EmitR(0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0),
         stype & 0x1f, 0xf);
@@ -527,6 +544,10 @@
   EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16);
 }
 
+void MipsAssembler::Bal(uint16_t imm16) {
+  EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16);
+}
+
 void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) {
   EmitI(0x4, rs, rt, imm16);
 }
@@ -619,6 +640,11 @@
   EmitI26(0x32, imm26);
 }
 
+void MipsAssembler::Balc(uint32_t imm26) {
+  CHECK(IsR6());
+  EmitI26(0x3A, imm26);
+}
+
 void MipsAssembler::Jic(Register rt, uint16_t imm16) {
   CHECK(IsR6());
   EmitI(0x36, static_cast<Register>(0), rt, imm16);
@@ -1385,13 +1411,8 @@
                                          Register base,
                                          int32_t offset,
                                          Register temp) {
-  if (!IsInt<16>(offset)) {
-    CHECK_NE(temp, AT);  //  Must not use AT as temp, as not to overwrite the loaded value.
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
+  CHECK_NE(temp, AT);  // Must not use AT as temp, so as not to overwrite the adjusted base.
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ false);
   if (value == 0) {
     temp = ZERO;
   } else {
@@ -1404,14 +1425,8 @@
                                          Register base,
                                          int32_t offset,
                                          Register temp) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) || !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize))) {
-    CHECK_NE(temp, AT);  //  Must not use AT as temp, as not to overwrite the loaded value.
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
+  CHECK_NE(temp, AT);  // Must not use AT as temp, so as not to overwrite the adjusted base.
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ true);
   uint32_t low = Low32Bits(value);
   uint32_t high = High32Bits(value);
   if (low == 0) {
@@ -1457,11 +1472,35 @@
 }
 
 void MipsAssembler::Addiu32(Register rt, Register rs, int32_t value, Register temp) {
+  CHECK_NE(rs, temp);  // Must not overwrite the register `rs` while loading `value`.
   if (IsInt<16>(value)) {
     Addiu(rt, rs, value);
+  } else if (IsR6()) {
+    int16_t high = High16Bits(value);
+    int16_t low = Low16Bits(value);
+    high += (low < 0) ? 1 : 0;  // Account for sign extension in addiu.
+    if (low != 0) {
+      Aui(temp, rs, high);
+      Addiu(rt, temp, low);
+    } else {
+      Aui(rt, rs, high);
+    }
   } else {
-    LoadConst32(temp, value);
-    Addu(rt, rs, temp);
+    // Do not load the whole 32-bit `value` if it can be represented as
+    // a sum of two 16-bit signed values. This can save an instruction.
+    constexpr int32_t kMinValueForSimpleAdjustment = std::numeric_limits<int16_t>::min() * 2;
+    constexpr int32_t kMaxValueForSimpleAdjustment = std::numeric_limits<int16_t>::max() * 2;
+    if (0 <= value && value <= kMaxValueForSimpleAdjustment) {
+      Addiu(temp, rs, kMaxValueForSimpleAdjustment / 2);
+      Addiu(rt, temp, value - kMaxValueForSimpleAdjustment / 2);
+    } else if (kMinValueForSimpleAdjustment <= value && value < 0) {
+      Addiu(temp, rs, kMinValueForSimpleAdjustment / 2);
+      Addiu(rt, temp, value - kMinValueForSimpleAdjustment / 2);
+    } else {
+      // Now that all shorter options have been exhausted, load the full 32-bit value.
+      LoadConst32(temp, value);
+      Addu(rt, rs, temp);
+    }
   }
 }
 
@@ -1471,30 +1510,47 @@
   type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
 }
 
-void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) {
+void MipsAssembler::Branch::InitializeType(bool is_call, bool is_literal, bool is_r6) {
+  CHECK_EQ(is_call && is_literal, false);
   OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
   if (is_r6) {
     // R6
-    if (is_call) {
+    if (is_literal) {
+      CHECK(!IsResolved());
+      type_ = kR6Literal;
+    } else if (is_call) {
       InitShortOrLong(offset_size, kR6Call, kR6LongCall);
-    } else if (condition_ == kUncond) {
-      InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
     } else {
-      if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
-        // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
-        type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
-      } else {
-        InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+      switch (condition_) {
+        case kUncond:
+          InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
+          break;
+        case kCondEQZ:
+        case kCondNEZ:
+          // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+          type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
+          break;
+        default:
+          InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+          break;
       }
     }
   } else {
     // R2
-    if (is_call) {
+    if (is_literal) {
+      CHECK(!IsResolved());
+      type_ = kLiteral;
+    } else if (is_call) {
       InitShortOrLong(offset_size, kCall, kLongCall);
-    } else if (condition_ == kUncond) {
-      InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
     } else {
-      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+      switch (condition_) {
+        case kUncond:
+          InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+          break;
+        default:
+          InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+          break;
+      }
     }
   }
   old_type_ = type_;
@@ -1526,14 +1582,14 @@
   }
 }
 
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target)
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call)
     : old_location_(location),
       location_(location),
       target_(target),
       lhs_reg_(0),
       rhs_reg_(0),
       condition_(kUncond) {
-  InitializeType(false, is_r6);
+  InitializeType(is_call, /* is_literal */ false, is_r6);
 }
 
 MipsAssembler::Branch::Branch(bool is_r6,
@@ -1590,19 +1646,23 @@
     // Branch condition is always true, make the branch unconditional.
     condition_ = kUncond;
   }
-  InitializeType(false, is_r6);
+  InitializeType(/* is_call */ false, /* is_literal */ false, is_r6);
 }
 
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg)
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg)
     : old_location_(location),
       location_(location),
-      target_(target),
-      lhs_reg_(indirect_reg),
-      rhs_reg_(0),
+      target_(kUnresolved),
+      lhs_reg_(dest_reg),
+      rhs_reg_(base_reg),
       condition_(kUncond) {
-  CHECK_NE(indirect_reg, ZERO);
-  CHECK_NE(indirect_reg, AT);
-  InitializeType(true, is_r6);
+  CHECK_NE(dest_reg, ZERO);
+  if (is_r6) {
+    CHECK_EQ(base_reg, ZERO);
+  } else {
+    CHECK_NE(base_reg, ZERO);
+  }
+  InitializeType(/* is_call */ false, /* is_literal */ true, is_r6);
 }
 
 MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition(
@@ -1704,19 +1764,27 @@
     case kUncondBranch:
     case kCondBranch:
     case kCall:
+    // R2 near literal.
+    case kLiteral:
     // R6 short branches.
     case kR6UncondBranch:
     case kR6CondBranch:
     case kR6Call:
+    // R6 near literal.
+    case kR6Literal:
       return false;
     // R2 long branches.
     case kLongUncondBranch:
     case kLongCondBranch:
     case kLongCall:
+    // R2 far literal.
+    case kFarLiteral:
     // R6 long branches.
     case kR6LongUncondBranch:
     case kR6LongCondBranch:
     case kR6LongCall:
+    // R6 far literal.
+    case kR6FarLiteral:
       return true;
   }
   UNREACHABLE();
@@ -1785,6 +1853,10 @@
     case kCall:
       type_ = kLongCall;
       break;
+    // R2 near literal.
+    case kLiteral:
+      type_ = kFarLiteral;
+      break;
     // R6 short branches.
     case kR6UncondBranch:
       type_ = kR6LongUncondBranch;
@@ -1795,6 +1867,10 @@
     case kR6Call:
       type_ = kR6LongCall;
       break;
+    // R6 near literal.
+    case kR6Literal:
+      type_ = kR6FarLiteral;
+      break;
     default:
       // Note: 'type_' is already long.
       break;
@@ -1802,14 +1878,26 @@
   CHECK(IsLong());
 }
 
-uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const {
+  switch (branch->GetType()) {
+    case Branch::kLiteral:
+    case Branch::kFarLiteral:
+      return GetLabelLocation(&pc_rel_base_label_);
+    default:
+      return branch->GetLocation();
+  }
+}
+
+uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) {
+  // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or
+  // `this->GetLocation()` for everything else.
   // If the branch is still unresolved or already long, nothing to do.
   if (IsLong() || !IsResolved()) {
     return 0;
   }
   // Promote the short branch to long if the offset size is too small
-  // to hold the distance between location_ and target_.
-  if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+  // to hold the distance between location and target_.
+  if (GetOffsetSizeNeeded(location, target_) > GetOffsetSize()) {
     PromoteToLong();
     uint32_t old_size = GetOldSize();
     uint32_t new_size = GetSize();
@@ -1819,7 +1907,7 @@
   // The following logic is for debugging/testing purposes.
   // Promote some short branches to long when it's not really required.
   if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
-    int64_t distance = static_cast<int64_t>(target_) - location_;
+    int64_t distance = static_cast<int64_t>(target_) - location;
     distance = (distance >= 0) ? distance : -distance;
     if (distance >= max_short_distance) {
       PromoteToLong();
@@ -1836,12 +1924,26 @@
   return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
 }
 
-uint32_t MipsAssembler::Branch::GetOffset() const {
+uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const {
+  switch (branch->GetType()) {
+    case Branch::kLiteral:
+    case Branch::kFarLiteral:
+      return GetLabelLocation(&pc_rel_base_label_);
+    default:
+      return branch->GetOffsetLocation() +
+          Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t);
+  }
+}
+
+uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const {
+  // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or
+  // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)`
+  // for everything else.
   CHECK(IsResolved());
   uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
   // Calculate the byte distance between instructions and also account for
   // different PC-relative origins.
-  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  uint32_t offset = target_ - location;
   // Prepare the offset for encoding into the instruction(s).
   offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
   return offset;
@@ -1888,7 +1990,7 @@
   label->BindTo(bound_pc);
 }
 
-uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const {
+uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const {
   CHECK(label->IsBound());
   uint32_t target = label->Position();
   if (label->prev_branch_id_plus_one_) {
@@ -1923,6 +2025,14 @@
   return old_position + last_position_adjustment_;
 }
 
+void MipsAssembler::BindPcRelBaseLabel() {
+  Bind(&pc_rel_base_label_);
+}
+
+uint32_t MipsAssembler::GetPcRelBaseLabelLocation() const {
+  return GetLabelLocation(&pc_rel_base_label_);
+}
+
 void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
   uint32_t length = branches_.back().GetLength();
   if (!label->IsBound()) {
@@ -1944,7 +2054,7 @@
 
 void MipsAssembler::Buncond(MipsLabel* label) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target);
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false);
   FinalizeLabeledBranch(label);
 }
 
@@ -1958,12 +2068,46 @@
   FinalizeLabeledBranch(label);
 }
 
-void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) {
+void MipsAssembler::Call(MipsLabel* label) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg);
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true);
   FinalizeLabeledBranch(label);
 }
 
+Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) {
+  DCHECK(size == 4u || size == 8u) << size;
+  literals_.emplace_back(size, data);
+  return &literals_.back();
+}
+
+void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* literal) {
+  // Literal loads are treated as pseudo branches since they require very similar handling.
+  DCHECK_EQ(literal->GetSize(), 4u);
+  MipsLabel* label = literal->GetLabel();
+  DCHECK(!label->IsBound());
+  branches_.emplace_back(IsR6(),
+                         buffer_.Size(),
+                         dest_reg,
+                         base_reg);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::EmitLiterals() {
+  if (!literals_.empty()) {
+    // We don't support byte and half-word literals.
+    // TODO: proper alignment for 64-bit literals when they're implemented.
+    for (Literal& literal : literals_) {
+      MipsLabel* label = literal.GetLabel();
+      Bind(label);
+      AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+      DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u);
+      for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+        buffer_.Emit<uint8_t>(literal.GetData()[i]);
+      }
+    }
+  }
+}
+
 void MipsAssembler::PromoteBranches() {
   // Promote short branches to long as necessary.
   bool changed;
@@ -1971,7 +2115,8 @@
     changed = false;
     for (auto& branch : branches_) {
       CHECK(branch.IsResolved());
-      uint32_t delta = branch.PromoteIfNeeded();
+      uint32_t base = GetBranchLocationOrPcRelBase(&branch);
+      uint32_t delta = branch.PromoteIfNeeded(base);
       // If this branch has been promoted and needs to expand in size,
       // relocate all branches by the expansion size.
       if (delta) {
@@ -2009,27 +2154,35 @@
   // R2 short branches.
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kUncondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCondBranch
-  {  5, 2, 0, MipsAssembler::Branch::kOffset16, 0 },  // kCall
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCall
+  // R2 near literal.
+  {  1, 0, 0, MipsAssembler::Branch::kOffset16, 0 },  // kLiteral
   // R2 long branches.
   {  9, 3, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongUncondBranch
   { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCondBranch
   {  6, 1, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCall
+  // R2 far literal.
+  {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kFarLiteral
   // R6 short branches.
   {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6UncondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kR6CondBranch
                                                       // Exception: kOffset23 for beqzc/bnezc.
-  {  2, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Call
+  {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6Call
+  // R6 near literal.
+  {  1, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Literal
   // R6 long branches.
   {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongUncondBranch
   {  3, 1, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCondBranch
-  {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCall
+  {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCall
+  // R6 far literal.
+  {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6FarLiteral
 };
 
-// Note: make sure branch_info_[] and mitBranch() are kept synchronized.
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
 void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
   CHECK_EQ(overwriting_, true);
   overwrite_location_ = branch->GetLocation();
-  uint32_t offset = branch->GetOffset();
+  uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch));
   BranchCondition condition = branch->GetCondition();
   Register lhs = branch->GetLeftRegister();
   Register rhs = branch->GetRightRegister();
@@ -2046,12 +2199,15 @@
       Nop();  // TODO: improve by filling the delay slot.
       break;
     case Branch::kCall:
-      Nal();
-      Nop();  // TODO: is this NOP really needed here?
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Addiu(lhs, RA, offset);
-      Jalr(lhs);
-      Nop();
+      Bal(offset);
+      Nop();  // TODO: improve by filling the delay slot.
+      break;
+
+    // R2 near literal.
+    case Branch::kLiteral:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lw(lhs, rhs, offset);
       break;
 
     // R2 long branches.
@@ -2105,11 +2261,20 @@
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Lui(AT, High16Bits(offset));
       Ori(AT, AT, Low16Bits(offset));
-      Addu(lhs, AT, RA);
-      Jalr(lhs);
+      Addu(AT, AT, RA);
+      Jalr(AT);
       Nop();
       break;
 
+    // R2 far literal.
+    case Branch::kFarLiteral:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in lw.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Addu(AT, AT, rhs);
+      Lw(lhs, AT, Low16Bits(offset));
+      break;
+
     // R6 short branches.
     case Branch::kR6UncondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -2122,8 +2287,13 @@
       break;
     case Branch::kR6Call:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Addiupc(lhs, offset);
-      Jialc(lhs, 0);
+      Balc(offset);
+      break;
+
+    // R6 near literal.
+    case Branch::kR6Literal:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lwpc(lhs, offset);
       break;
 
     // R6 long branches.
@@ -2141,11 +2311,18 @@
       Jic(AT, Low16Bits(offset));
       break;
     case Branch::kR6LongCall:
-      offset += (offset & 0x8000) << 1;  // Account for sign extension in addiu.
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jialc.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Auipc(lhs, High16Bits(offset));
-      Addiu(lhs, lhs, Low16Bits(offset));
-      Jialc(lhs, 0);
+      Auipc(AT, High16Bits(offset));
+      Jialc(AT, Low16Bits(offset));
+      break;
+
+    // R6 far literal.
+    case Branch::kR6FarLiteral:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in lw.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Lw(lhs, AT, Low16Bits(offset));
       break;
   }
   CHECK_EQ(overwrite_location_, branch->GetEndLocation());
@@ -2156,8 +2333,8 @@
   Buncond(label);
 }
 
-void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) {
-  Call(label, indirect_reg);
+void MipsAssembler::Bal(MipsLabel* label) {
+  Call(label);
 }
 
 void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) {
@@ -2262,17 +2439,103 @@
   Bcond(label, kCondT, static_cast<Register>(ft), ZERO);
 }
 
-void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base,
-                                   int32_t offset) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) ||
-      (type == kLoadDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
+void MipsAssembler::AdjustBaseAndOffset(Register& base,
+                                        int32_t& offset,
+                                        bool is_doubleword,
+                                        bool is_float) {
+  // This method is used to adjust the base register and offset pair
+  // for a load/store when the offset doesn't fit into int16_t.
+  // It is assumed that `base + offset` is sufficiently aligned for memory
+  // operands that are machine word in size or smaller. For doubleword-sized
+  // operands it's assumed that `base` is a multiple of 8, while `offset`
+  // may be a multiple of 4 (e.g. 4-byte-aligned long and double arguments
+  // and spilled variables on the stack accessed relative to the stack
+  // pointer register).
+  // We preserve the "alignment" of `offset` by adjusting it by a multiple of 8.
+  CHECK_NE(base, AT);  // Must not overwrite the register `base` while loading `offset`.
+
+  bool doubleword_aligned = IsAligned<kMipsDoublewordSize>(offset);
+  bool two_accesses = is_doubleword && (!is_float || !doubleword_aligned);
+
+  // IsInt<16> must be passed a signed value, hence the static cast below.
+  if (IsInt<16>(offset) &&
+      (!two_accesses || IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    // Nothing to do: `offset` (and, if needed, `offset + 4`) fits into int16_t.
+    return;
   }
 
+  // Remember the "(mis)alignment" of `offset`, it will be checked at the end.
+  uint32_t misalignment = offset & (kMipsDoublewordSize - 1);
+
+  // Do not load the whole 32-bit `offset` if it can be represented as
+  // a sum of two 16-bit signed offsets. This can save an instruction or two.
+  // To simplify matters, only do this for a symmetric range of offsets from
+  // about -64KB to about +64KB, allowing further addition of 4 when accessing
+  // 64-bit variables with two 32-bit accesses.
+  constexpr int32_t kMinOffsetForSimpleAdjustment = 0x7ff8;  // Max int16_t that's a multiple of 8.
+  constexpr int32_t kMaxOffsetForSimpleAdjustment = 2 * kMinOffsetForSimpleAdjustment;
+  if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) {
+    Addiu(AT, base, kMinOffsetForSimpleAdjustment);
+    offset -= kMinOffsetForSimpleAdjustment;
+  } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) {
+    Addiu(AT, base, -kMinOffsetForSimpleAdjustment);
+    offset += kMinOffsetForSimpleAdjustment;
+  } else if (IsR6()) {
+    // On R6 take advantage of the aui instruction, e.g.:
+    //   aui   AT, base, offset_high
+    //   lw    reg_lo, offset_low(AT)
+    //   lw    reg_hi, (offset_low+4)(AT)
+    // or when offset_low+4 overflows int16_t:
+    //   aui   AT, base, offset_high
+    //   addiu AT, AT, 8
+    //   lw    reg_lo, (offset_low-8)(AT)
+    //   lw    reg_hi, (offset_low-4)(AT)
+    int16_t offset_high = High16Bits(offset);
+    int16_t offset_low = Low16Bits(offset);
+    offset_high += (offset_low < 0) ? 1 : 0;  // Account for offset sign extension in load/store.
+    Aui(AT, base, offset_high);
+    if (two_accesses && !IsInt<16>(static_cast<int32_t>(offset_low + kMipsWordSize))) {
+      // Avoid overflow in the 16-bit offset of the load/store instruction when adding 4.
+      Addiu(AT, AT, kMipsDoublewordSize);
+      offset_low -= kMipsDoublewordSize;
+    }
+    offset = offset_low;
+  } else {
+    // Do not load the whole 32-bit `offset` if it can be represented as
+    // a sum of three 16-bit signed offsets. This can save an instruction.
+    // To simplify matters, only do this for a symmetric range of offsets from
+    // about -96KB to about +96KB, allowing further addition of 4 when accessing
+    // 64-bit variables with two 32-bit accesses.
+    constexpr int32_t kMinOffsetForMediumAdjustment = 2 * kMinOffsetForSimpleAdjustment;
+    constexpr int32_t kMaxOffsetForMediumAdjustment = 3 * kMinOffsetForSimpleAdjustment;
+    if (0 <= offset && offset <= kMaxOffsetForMediumAdjustment) {
+      Addiu(AT, base, kMinOffsetForMediumAdjustment / 2);
+      Addiu(AT, AT, kMinOffsetForMediumAdjustment / 2);
+      offset -= kMinOffsetForMediumAdjustment;
+    } else if (-kMaxOffsetForMediumAdjustment <= offset && offset < 0) {
+      Addiu(AT, base, -kMinOffsetForMediumAdjustment / 2);
+      Addiu(AT, AT, -kMinOffsetForMediumAdjustment / 2);
+      offset += kMinOffsetForMediumAdjustment;
+    } else {
+      // Now that all shorter options have been exhausted, load the full 32-bit offset.
+      int32_t loaded_offset = RoundDown(offset, kMipsDoublewordSize);
+      LoadConst32(AT, loaded_offset);
+      Addu(AT, AT, base);
+      offset -= loaded_offset;
+    }
+  }
+  base = AT;
+
+  CHECK(IsInt<16>(offset));
+  if (two_accesses) {
+    CHECK(IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)));
+  }
+  CHECK_EQ(misalignment, offset & (kMipsDoublewordSize - 1));
+}
+
+void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base,
+                                   int32_t offset) {
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
   switch (type) {
     case kLoadSignedByte:
       Lb(reg, base, offset);
@@ -2306,27 +2569,12 @@
 }
 
 void MipsAssembler::LoadSFromOffset(FRegister reg, Register base, int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true);
   Lwc1(reg, base, offset);
 }
 
 void MipsAssembler::LoadDFromOffset(FRegister reg, Register base, int32_t offset) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) ||
-      (!IsAligned<kMipsDoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true);
   if (offset & 0x7) {
     if (Is32BitFPU()) {
       Lwc1(reg, base, offset);
@@ -2365,15 +2613,10 @@
 
 void MipsAssembler::StoreToOffset(StoreOperandType type, Register reg, Register base,
                                   int32_t offset) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) ||
-      (type == kStoreDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  // Must not use AT as `reg`, so as not to overwrite the value being stored
+  // with the adjusted `base`.
+  CHECK_NE(reg, AT);
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
   switch (type) {
     case kStoreByte:
       Sb(reg, base, offset);
@@ -2396,27 +2639,12 @@
 }
 
 void MipsAssembler::StoreSToOffset(FRegister reg, Register base, int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true);
   Swc1(reg, base, offset);
 }
 
 void MipsAssembler::StoreDToOffset(FRegister reg, Register base, int32_t offset) {
-  // IsInt<16> must be passed a signed value.
-  if (!IsInt<16>(offset) ||
-      (!IsAligned<kMipsDoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
-    LoadConst32(AT, offset);
-    Addu(AT, AT, base);
-    base = AT;
-    offset = 0;
-  }
-
+  AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true);
   if (offset & 0x7) {
     if (Is32BitFPU()) {
       Swc1(reg, base, offset);
@@ -2438,8 +2666,9 @@
 
 constexpr size_t kFramePointerSize = 4;
 
-void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                               const std::vector<ManagedRegister>& callee_save_regs,
+void MipsAssembler::BuildFrame(size_t frame_size,
+                               ManagedRegister method_reg,
+                               ArrayRef<const ManagedRegister> callee_save_regs,
                                const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
@@ -2453,7 +2682,7 @@
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
-    Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
+    Register reg = callee_save_regs[i].AsMips().AsCoreRegister();
     StoreToOffset(kStoreWord, reg, SP, stack_offset);
     cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
@@ -2482,7 +2711,7 @@
 }
 
 void MipsAssembler::RemoveFrame(size_t frame_size,
-                                const std::vector<ManagedRegister>& callee_save_regs) {
+                                ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
   cfi_.RememberState();
@@ -2490,7 +2719,7 @@
   // Pop callee saves and return address.
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
-    Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
+    Register reg = callee_save_regs[i].AsMips().AsCoreRegister();
     LoadFromOffset(kLoadWord, reg, SP, stack_offset);
     cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
@@ -2570,26 +2799,17 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest, uint32_t imm,
+void MipsAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                             FrameOffset fr_offs,
                                              ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  // Is this function even referenced anywhere else in the code?
-  LoadConst32(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value());
-}
-
-void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs,
-                                               FrameOffset fr_offs,
-                                               ManagedRegister mscratch) {
-  MipsManagedRegister scratch = mscratch.AsMips();
-  CHECK(scratch.IsCoreRegister()) << scratch;
   Addiu32(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
                 S1, thr_offs.Int32Value());
 }
 
-void MipsAssembler::StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) {
+void MipsAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
   StoreToOffset(kStoreWord, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2606,8 +2826,7 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void MipsAssembler::LoadFromThread32(ManagedRegister mdest,
-                                     ThreadOffset<kMipsWordSize> src, size_t size) {
+void MipsAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -2635,8 +2854,7 @@
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
 }
 
-void MipsAssembler::LoadRawPtrFromThread32(ManagedRegister mdest,
-                                           ThreadOffset<kMipsWordSize> offs) {
+void MipsAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
   MipsManagedRegister dest = mdest.AsMips();
   CHECK(dest.IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(), S1, offs.Int32Value());
@@ -2690,9 +2908,9 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                           ThreadOffset<kMipsWordSize> thr_offs,
-                                           ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                         ThreadOffset32 thr_offs,
+                                         ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -2701,9 +2919,9 @@
                 SP, fr_offs.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs,
-                                         FrameOffset fr_offs,
-                                         ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                       FrameOffset fr_offs,
+                                       ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -2875,8 +3093,8 @@
   // TODO: place reference map on call.
 }
 
-void MipsAssembler::CallFromThread32(ThreadOffset<kMipsWordSize> offset ATTRIBUTE_UNUSED,
-                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+void MipsAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
+                                   ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "no mips implementation";
 }
 
@@ -2893,7 +3111,7 @@
   MipsManagedRegister scratch = mscratch.AsMips();
   exception_blocks_.emplace_back(scratch, stack_adjust);
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 S1, Thread::ExceptionOffset<kMipsWordSize>().Int32Value());
+                 S1, Thread::ExceptionOffset<kMipsPointerSize>().Int32Value());
   // TODO: on MIPS32R6 prefer Bnezc(scratch.AsCoreRegister(), slow.Entry());
   // as the NAL instruction (occurring in long R2 branches) may become deprecated.
   // For now use common for R2 and R6 instructions as this code must execute on both.
@@ -2911,7 +3129,7 @@
   Move(A0, exception->scratch_.AsCoreRegister());
   // Set up call to Thread::Current()->pDeliverException.
   LoadFromOffset(kLoadWord, T9, S1,
-    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pDeliverException).Int32Value());
+    QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pDeliverException).Int32Value());
   Jr(T9);
   Nop();
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index ecb67bd..41b6c6b 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -17,16 +17,20 @@
 #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 
+#include <deque>
 #include <utility>
 #include <vector>
 
 #include "arch/mips/instruction_set_features_mips.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_mips.h"
 #include "globals.h"
 #include "managed_register_mips.h"
 #include "offsets.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/label.h"
 
 namespace art {
@@ -79,6 +83,49 @@
   DISALLOW_COPY_AND_ASSIGN(MipsLabel);
 };
 
+// Assembler literal is a value embedded in code, retrieved using a PC-relative load.
+class Literal {
+ public:
+  static constexpr size_t kMaxSize = 8;
+
+  Literal(uint32_t size, const uint8_t* data)
+      : label_(), size_(size) {
+    DCHECK_LE(size, Literal::kMaxSize);
+    memcpy(data_, data, size);
+  }
+
+  template <typename T>
+  T GetValue() const {
+    DCHECK_EQ(size_, sizeof(T));
+    T value;
+    memcpy(&value, data_, sizeof(T));
+    return value;
+  }
+
+  uint32_t GetSize() const {
+    return size_;
+  }
+
+  const uint8_t* GetData() const {
+    return data_;
+  }
+
+  MipsLabel* GetLabel() {
+    return &label_;
+  }
+
+  const MipsLabel* GetLabel() const {
+    return &label_;
+  }
+
+ private:
+  MipsLabel label_;
+  const uint32_t size_;
+  uint8_t data_[kMaxSize];
+
+  DISALLOW_COPY_AND_ASSIGN(Literal);
+};
+
 // Slowpath entered when Thread::Current()->_exception is non-null.
 class MipsExceptionSlowPath {
  public:
@@ -100,13 +147,14 @@
   DISALLOW_COPY_AND_ASSIGN(MipsExceptionSlowPath);
 };
 
-class MipsAssembler FINAL : public Assembler {
+class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k32> {
  public:
   explicit MipsAssembler(ArenaAllocator* arena,
                          const MipsInstructionSetFeatures* instruction_set_features = nullptr)
       : Assembler(arena),
         overwriting_(false),
         overwrite_location_(0),
+        literals_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0),
         last_old_position_(0),
         last_branch_id_(0),
@@ -114,6 +162,9 @@
     cfi().DelayEmittingAdvancePCs();
   }
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
   virtual ~MipsAssembler() {
     for (auto& branch : branches_) {
       CHECK(branch.IsResolved());
@@ -182,7 +233,9 @@
   void Lwr(Register rt, Register rs, uint16_t imm16);
   void Lbu(Register rt, Register rs, uint16_t imm16);
   void Lhu(Register rt, Register rs, uint16_t imm16);
+  void Lwpc(Register rs, uint32_t imm19);  // R6
   void Lui(Register rt, uint16_t imm16);
+  void Aui(Register rt, Register rs, uint16_t imm16);  // R6
   void Sync(uint32_t stype);
   void Mfhi(Register rd);  // R2
   void Mflo(Register rd);  // R2
@@ -204,6 +257,7 @@
   void Sltiu(Register rt, Register rs, uint16_t imm16);
 
   void B(uint16_t imm16);
+  void Bal(uint16_t imm16);
   void Beq(Register rs, Register rt, uint16_t imm16);
   void Bne(Register rs, Register rt, uint16_t imm16);
   void Beqz(Register rt, uint16_t imm16);
@@ -225,6 +279,7 @@
   void Auipc(Register rs, uint16_t imm16);  // R6
   void Addiupc(Register rs, uint32_t imm19);  // R6
   void Bc(uint32_t imm26);  // R6
+  void Balc(uint32_t imm26);  // R6
   void Jic(Register rt, uint16_t imm16);  // R6
   void Jialc(Register rt, uint16_t imm16);  // R6
   void Bltc(Register rs, Register rt, uint16_t imm16);  // R6
@@ -364,7 +419,7 @@
   // These will generate R2 branches or R6 branches as appropriate.
   void Bind(MipsLabel* label);
   void B(MipsLabel* label);
-  void Jalr(MipsLabel* label, Register indirect_reg);
+  void Bal(MipsLabel* label);
   void Beq(Register rs, Register rt, MipsLabel* label);
   void Bne(Register rs, Register rt, MipsLabel* label);
   void Beqz(Register rt, MipsLabel* label);
@@ -385,6 +440,10 @@
   void Bc1nez(FRegister ft, MipsLabel* label);  // R6
 
   void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size);
+  void AdjustBaseAndOffset(Register& base,
+                           int32_t& offset,
+                           bool is_doubleword,
+                           bool is_float = false);
   void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset);
   void LoadSFromOffset(FRegister reg, Register base, int32_t offset);
   void LoadDFromOffset(FRegister reg, Register base, int32_t offset);
@@ -407,6 +466,21 @@
     UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS";
   }
 
+  // Create a new literal with a given value.
+  // NOTE: Force the template parameter to be explicitly specified.
+  template <typename T>
+  Literal* NewLiteral(typename Identity<T>::type value) {
+    static_assert(std::is_integral<T>::value, "T must be an integral type.");
+    return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
+  }
+
+  // Create a new literal with the given data.
+  Literal* NewLiteral(size_t size, const uint8_t* data);
+
+  // Load literal using the base register (for R2 only) or using PC-relative loads
+  // (for R6 only; base_reg must be ZERO).
+  void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal);
+
   //
   // Overridden common assembler high-level functionality.
   //
@@ -414,11 +488,11 @@
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
@@ -431,15 +505,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest,
-                                uint32_t imm,
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
                                 ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs,
-                                  FrameOffset fr_offs,
-                                  ManagedRegister mscratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest,
                      ManagedRegister msrc,
@@ -449,9 +519,7 @@
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread32(ManagedRegister mdest,
-                        ThreadOffset<kMipsWordSize> src,
-                        size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -462,19 +530,19 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<kMipsWordSize> offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread32(FrameOffset fr_offs,
-                              ThreadOffset<kMipsWordSize> thr_offs,
-                              ManagedRegister mscratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs,
-                            FrameOffset fr_offs,
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                          FrameOffset fr_offs,
+                          ManagedRegister mscratch) OVERRIDE;
+
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
@@ -550,7 +618,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<kMipsWordSize> offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
@@ -564,12 +632,25 @@
 
   // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS,
   // must be used instead of MipsLabel::GetPosition()).
-  uint32_t GetLabelLocation(MipsLabel* label) const;
+  uint32_t GetLabelLocation(const MipsLabel* label) const;
 
   // Get the final position of a label after local fixup based on the old position
   // recorded before FinalizeCode().
   uint32_t GetAdjustedPosition(uint32_t old_position);
 
+  // R2 doesn't have PC-relative addressing, which we need to access literals. We simulate it by
+  // reading the PC value into a general-purpose register with the NAL instruction and then loading
+  // literals through this base register. The code generator calls this method (at most once per
+  // method being compiled) to bind a label to the location for which the PC value is acquired.
+  // The assembler then computes literal offsets relative to this label.
+  void BindPcRelBaseLabel();
+
+  // Returns the location of the label bound with BindPcRelBaseLabel().
+  uint32_t GetPcRelBaseLabelLocation() const;
+
+  // Note that PC-relative literal loads are handled as pseudo branches because they need very
+  // similar relocation and may similarly expand in size to accomodate for larger offsets relative
+  // to PC.
   enum BranchCondition {
     kCondLT,
     kCondGE,
@@ -599,18 +680,26 @@
       kUncondBranch,
       kCondBranch,
       kCall,
+      // R2 near literal.
+      kLiteral,
       // R2 long branches.
       kLongUncondBranch,
       kLongCondBranch,
       kLongCall,
+      // R2 far literal.
+      kFarLiteral,
       // R6 short branches.
       kR6UncondBranch,
       kR6CondBranch,
       kR6Call,
+      // R6 near literal.
+      kR6Literal,
       // R6 long branches.
       kR6LongUncondBranch,
       kR6LongCondBranch,
       kR6LongCall,
+      // R6 far literal.
+      kR6FarLiteral,
     };
     // Bit sizes of offsets defined as enums to minimize chance of typos.
     enum OffsetBits {
@@ -645,17 +734,17 @@
     };
     static const BranchInfo branch_info_[/* Type */];
 
-    // Unconditional branch.
-    Branch(bool is_r6, uint32_t location, uint32_t target);
+    // Unconditional branch or call.
+    Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call);
     // Conditional branch.
     Branch(bool is_r6,
            uint32_t location,
            uint32_t target,
            BranchCondition condition,
            Register lhs_reg,
-           Register rhs_reg = ZERO);
-    // Call (branch and link) that stores the target address in a given register (i.e. T9).
-    Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg);
+           Register rhs_reg);
+    // Literal.
+    Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg);
 
     // Some conditional branches with lhs = rhs are effectively NOPs, while some
     // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
@@ -731,17 +820,18 @@
     // that is allowed for short branches. This is for debugging/testing purposes.
     // max_short_distance = 0 forces all short branches to become long.
     // Use the implicit default argument when not debugging/testing.
-    uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+    uint32_t PromoteIfNeeded(uint32_t location,
+                             uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
 
     // Returns the location of the instruction(s) containing the offset.
     uint32_t GetOffsetLocation() const;
 
     // Calculates and returns the offset ready for encoding in the branch instruction(s).
-    uint32_t GetOffset() const;
+    uint32_t GetOffset(uint32_t location) const;
 
    private:
     // Completes branch construction by determining and recording its type.
-    void InitializeType(bool is_call, bool is_r6);
+    void InitializeType(bool is_call, bool is_literal, bool is_r6);
     // Helper for the above.
     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
 
@@ -771,12 +861,15 @@
 
   void Buncond(MipsLabel* label);
   void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
-  void Call(MipsLabel* label, Register indirect_reg);
+  void Call(MipsLabel* label);
   void FinalizeLabeledBranch(MipsLabel* label);
 
   Branch* GetBranch(uint32_t branch_id);
   const Branch* GetBranch(uint32_t branch_id) const;
+  uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const;
+  uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const;
 
+  void EmitLiterals();
   void PromoteBranches();
   void EmitBranch(Branch* branch);
   void EmitBranches();
@@ -811,6 +904,15 @@
   // The current overwrite location.
   uint32_t overwrite_location_;
 
+  // Use std::deque<> for literal labels to allow insertions at the end
+  // without invalidating pointers and references to existing elements.
+  ArenaDeque<Literal> literals_;
+
+  // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC
+  // we get PC using the NAL instruction. This label marks the position within the assembler buffer
+  // that PC (from NAL) points to.
+  MipsLabel pc_rel_base_label_;
+
   // Data for AdjustedPosition(), see the description there.
   uint32_t last_position_adjustment_;
   uint32_t last_old_position_;
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
new file mode 100644
index 0000000..49ef272
--- /dev/null
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -0,0 +1,714 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_mips.h"
+
+#include <map>
+
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
+
+#define __ GetAssembler()->
+
+namespace art {
+
+struct MIPSCpuRegisterCompare {
+  bool operator()(const mips::Register& a, const mips::Register& b) const {
+    return a < b;
+  }
+};
+
+class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler,
+                                                   mips::Register,
+                                                   mips::FRegister,
+                                                   uint32_t> {
+ public:
+  typedef AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, uint32_t> Base;
+
+  AssemblerMIPS32r6Test() :
+    instruction_set_features_(MipsInstructionSetFeatures::FromVariant("mips32r6", nullptr)) {
+  }
+
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "mips";
+  }
+
+  std::string GetAssemblerCmdName() OVERRIDE {
+    // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details.
+    return "gcc";
+  }
+
+  std::string GetAssemblerParameters() OVERRIDE {
+    // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6
+    // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative
+    // branches in the .text section and so they require a relocation pass (there's a relocation
+    // section, .rela.text, that has the needed info to fix up the branches).
+    // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers.
+    // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the
+    // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily).
+    return " -march=mips32r6 -modd-spreg -Wa,--no-warn"
+        " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib";
+  }
+
+  void Pad(std::vector<uint8_t>& data) OVERRIDE {
+    // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple
+    // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't
+    // pad, so, in order for two assembler outputs to match, we need to match the padding as well.
+    // NOP is encoded as four zero bytes on MIPS.
+    size_t pad_size = RoundUp(data.size(), 16u) - data.size();
+    data.insert(data.end(), pad_size, 0);
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mmips:isa32r6";
+  }
+
+  mips::MipsAssembler* CreateAssembler(ArenaAllocator* arena) OVERRIDE {
+    return new (arena) mips::MipsAssembler(arena, instruction_set_features_.get());
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.push_back(new mips::Register(mips::ZERO));
+      registers_.push_back(new mips::Register(mips::AT));
+      registers_.push_back(new mips::Register(mips::V0));
+      registers_.push_back(new mips::Register(mips::V1));
+      registers_.push_back(new mips::Register(mips::A0));
+      registers_.push_back(new mips::Register(mips::A1));
+      registers_.push_back(new mips::Register(mips::A2));
+      registers_.push_back(new mips::Register(mips::A3));
+      registers_.push_back(new mips::Register(mips::T0));
+      registers_.push_back(new mips::Register(mips::T1));
+      registers_.push_back(new mips::Register(mips::T2));
+      registers_.push_back(new mips::Register(mips::T3));
+      registers_.push_back(new mips::Register(mips::T4));
+      registers_.push_back(new mips::Register(mips::T5));
+      registers_.push_back(new mips::Register(mips::T6));
+      registers_.push_back(new mips::Register(mips::T7));
+      registers_.push_back(new mips::Register(mips::S0));
+      registers_.push_back(new mips::Register(mips::S1));
+      registers_.push_back(new mips::Register(mips::S2));
+      registers_.push_back(new mips::Register(mips::S3));
+      registers_.push_back(new mips::Register(mips::S4));
+      registers_.push_back(new mips::Register(mips::S5));
+      registers_.push_back(new mips::Register(mips::S6));
+      registers_.push_back(new mips::Register(mips::S7));
+      registers_.push_back(new mips::Register(mips::T8));
+      registers_.push_back(new mips::Register(mips::T9));
+      registers_.push_back(new mips::Register(mips::K0));
+      registers_.push_back(new mips::Register(mips::K1));
+      registers_.push_back(new mips::Register(mips::GP));
+      registers_.push_back(new mips::Register(mips::SP));
+      registers_.push_back(new mips::Register(mips::FP));
+      registers_.push_back(new mips::Register(mips::RA));
+
+      secondary_register_names_.emplace(mips::Register(mips::ZERO), "zero");
+      secondary_register_names_.emplace(mips::Register(mips::AT), "at");
+      secondary_register_names_.emplace(mips::Register(mips::V0), "v0");
+      secondary_register_names_.emplace(mips::Register(mips::V1), "v1");
+      secondary_register_names_.emplace(mips::Register(mips::A0), "a0");
+      secondary_register_names_.emplace(mips::Register(mips::A1), "a1");
+      secondary_register_names_.emplace(mips::Register(mips::A2), "a2");
+      secondary_register_names_.emplace(mips::Register(mips::A3), "a3");
+      secondary_register_names_.emplace(mips::Register(mips::T0), "t0");
+      secondary_register_names_.emplace(mips::Register(mips::T1), "t1");
+      secondary_register_names_.emplace(mips::Register(mips::T2), "t2");
+      secondary_register_names_.emplace(mips::Register(mips::T3), "t3");
+      secondary_register_names_.emplace(mips::Register(mips::T4), "t4");
+      secondary_register_names_.emplace(mips::Register(mips::T5), "t5");
+      secondary_register_names_.emplace(mips::Register(mips::T6), "t6");
+      secondary_register_names_.emplace(mips::Register(mips::T7), "t7");
+      secondary_register_names_.emplace(mips::Register(mips::S0), "s0");
+      secondary_register_names_.emplace(mips::Register(mips::S1), "s1");
+      secondary_register_names_.emplace(mips::Register(mips::S2), "s2");
+      secondary_register_names_.emplace(mips::Register(mips::S3), "s3");
+      secondary_register_names_.emplace(mips::Register(mips::S4), "s4");
+      secondary_register_names_.emplace(mips::Register(mips::S5), "s5");
+      secondary_register_names_.emplace(mips::Register(mips::S6), "s6");
+      secondary_register_names_.emplace(mips::Register(mips::S7), "s7");
+      secondary_register_names_.emplace(mips::Register(mips::T8), "t8");
+      secondary_register_names_.emplace(mips::Register(mips::T9), "t9");
+      secondary_register_names_.emplace(mips::Register(mips::K0), "k0");
+      secondary_register_names_.emplace(mips::Register(mips::K1), "k1");
+      secondary_register_names_.emplace(mips::Register(mips::GP), "gp");
+      secondary_register_names_.emplace(mips::Register(mips::SP), "sp");
+      secondary_register_names_.emplace(mips::Register(mips::FP), "fp");
+      secondary_register_names_.emplace(mips::Register(mips::RA), "ra");
+
+      fp_registers_.push_back(new mips::FRegister(mips::F0));
+      fp_registers_.push_back(new mips::FRegister(mips::F1));
+      fp_registers_.push_back(new mips::FRegister(mips::F2));
+      fp_registers_.push_back(new mips::FRegister(mips::F3));
+      fp_registers_.push_back(new mips::FRegister(mips::F4));
+      fp_registers_.push_back(new mips::FRegister(mips::F5));
+      fp_registers_.push_back(new mips::FRegister(mips::F6));
+      fp_registers_.push_back(new mips::FRegister(mips::F7));
+      fp_registers_.push_back(new mips::FRegister(mips::F8));
+      fp_registers_.push_back(new mips::FRegister(mips::F9));
+      fp_registers_.push_back(new mips::FRegister(mips::F10));
+      fp_registers_.push_back(new mips::FRegister(mips::F11));
+      fp_registers_.push_back(new mips::FRegister(mips::F12));
+      fp_registers_.push_back(new mips::FRegister(mips::F13));
+      fp_registers_.push_back(new mips::FRegister(mips::F14));
+      fp_registers_.push_back(new mips::FRegister(mips::F15));
+      fp_registers_.push_back(new mips::FRegister(mips::F16));
+      fp_registers_.push_back(new mips::FRegister(mips::F17));
+      fp_registers_.push_back(new mips::FRegister(mips::F18));
+      fp_registers_.push_back(new mips::FRegister(mips::F19));
+      fp_registers_.push_back(new mips::FRegister(mips::F20));
+      fp_registers_.push_back(new mips::FRegister(mips::F21));
+      fp_registers_.push_back(new mips::FRegister(mips::F22));
+      fp_registers_.push_back(new mips::FRegister(mips::F23));
+      fp_registers_.push_back(new mips::FRegister(mips::F24));
+      fp_registers_.push_back(new mips::FRegister(mips::F25));
+      fp_registers_.push_back(new mips::FRegister(mips::F26));
+      fp_registers_.push_back(new mips::FRegister(mips::F27));
+      fp_registers_.push_back(new mips::FRegister(mips::F28));
+      fp_registers_.push_back(new mips::FRegister(mips::F29));
+      fp_registers_.push_back(new mips::FRegister(mips::F30));
+      fp_registers_.push_back(new mips::FRegister(mips::F31));
+    }
+  }
+
+  void TearDown() OVERRIDE {
+    AssemblerTest::TearDown();
+    STLDeleteElements(&registers_);
+    STLDeleteElements(&fp_registers_);
+  }
+
+  std::vector<mips::Register*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  std::vector<mips::FRegister*> GetFPRegisters() OVERRIDE {
+    return fp_registers_;
+  }
+
+  uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
+    return imm_value;
+  }
+
+  std::string GetSecondaryRegisterName(const mips::Register& reg) OVERRIDE {
+    CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end());
+    return secondary_register_names_[reg];
+  }
+
+  std::string RepeatInsn(size_t count, const std::string& insn) {
+    std::string result;
+    for (; count != 0u; --count) {
+      result += insn;
+    }
+    return result;
+  }
+
+  void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                              mips::Register,
+                                                              mips::MipsLabel*),
+                               std::string instr_name) {
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
+ private:
+  std::vector<mips::Register*> registers_;
+  std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
+
+  std::vector<mips::FRegister*> fp_registers_;
+  std::unique_ptr<const MipsInstructionSetFeatures> instruction_set_features_;
+};
+
+
+TEST_F(AssemblerMIPS32r6Test, Toolchain) {
+  EXPECT_TRUE(CheckTools());
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::MulR6, "mul ${reg1}, ${reg2}, ${reg3}"), "MulR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MuhR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::MuhR6, "muh ${reg1}, ${reg2}, ${reg3}"), "MuhR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MuhuR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::MuhuR6, "muhu ${reg1}, ${reg2}, ${reg3}"), "MuhuR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, DivR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::DivR6, "div ${reg1}, ${reg2}, ${reg3}"), "DivR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ModR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::ModR6, "mod ${reg1}, ${reg2}, ${reg3}"), "ModR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, DivuR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::DivuR6, "divu ${reg1}, ${reg2}, ${reg3}"), "DivuR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ModuR6) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::ModuR6, "modu ${reg1}, ${reg2}, ${reg3}"), "ModuR6");
+}
+
+//////////
+// MISC //
+//////////
+
+TEST_F(AssemblerMIPS32r6Test, Aui) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Aui, 16, "aui ${reg1}, ${reg2}, {imm}"), "Aui");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Auipc) {
+  DriverStr(RepeatRIb(&mips::MipsAssembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Lwpc) {
+  // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset,
+  // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`.
+  // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right
+  // by 2 positions when encoding, hence `<< 2` to compensate for that shift.
+  // We capture the value of the immediate with `.set imm, {imm}` because the value is needed
+  // twice for the sign extension, but `{imm}` is substituted only once.
+  const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)";
+  DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bitswap) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Seleqz) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"),
+            "seleqz");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Selnez) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"),
+            "selnez");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ClzR6) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::ClzR6, "clz ${reg1}, ${reg2}"), "clzR6");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CloR6) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::CloR6, "clo ${reg1}, ${reg2}"), "cloR6");
+}
+
+////////////////////
+// FLOATING POINT //
+////////////////////
+
+TEST_F(AssemblerMIPS32r6Test, SelS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SelS, "sel.s ${reg1}, ${reg2}, ${reg3}"), "sel.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SelD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SelD, "sel.d ${reg1}, ${reg2}, ${reg3}"), "sel.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ClassS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::ClassS, "class.s ${reg1}, ${reg2}"), "class.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, ClassD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::ClassD, "class.d ${reg1}, ${reg2}"), "class.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MinS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MinS, "min.s ${reg1}, ${reg2}, ${reg3}"), "min.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MinD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MinD, "min.d ${reg1}, ${reg2}, ${reg3}"), "min.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MaxS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MaxS, "max.s ${reg1}, ${reg2}, ${reg3}"), "max.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MaxD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUnS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpEqS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUeqS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpLtS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUltS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpLeS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUleS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpOrS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUneS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpNeS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUnD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpEqD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUeqD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpLtD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUltD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpLeD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUleD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpOrD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpUneD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, CmpNeD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadDFromOffset) {
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFB);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFC);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFF);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0xFFF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8008);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8001);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0xFFF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF1);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF1);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE9);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE9);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x12345678);
+
+  const char* expected =
+      "ldc1 $f0, -0x8000($a0)\n"
+      "ldc1 $f0, 0($a0)\n"
+      "ldc1 $f0, 0x7FF8($a0)\n"
+      "lwc1 $f0, 0x7FFB($a0)\n"
+      "lw $t8, 0x7FFF($a0)\n"
+      "mthc1 $t8, $f0\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t8, 8($at)\n"
+      "mthc1 $t8, $f0\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f0, 7($at)\n"
+      "lw $t8, 11($at)\n"
+      "mthc1 $t8, $f0\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "ldc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "ldc1 $f0, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f0, -9($at)\n"
+      "lw $t8, -5($at)\n"
+      "mthc1 $t8, $f0\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "ldc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "ldc1 $f0, 0x7FF8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "ldc1 $f0, -0x7FE8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "ldc1 $f0, 0x8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "lwc1 $f0, 0xF($at)\n"
+      "lw $t8, 0x13($at)\n"
+      "mthc1 $t8, $f0\n"
+      "aui $at, $a0, 0x1\n"
+      "lwc1 $f0, -0xF($at)\n"
+      "lw $t8, -0xB($at)\n"
+      "mthc1 $t8, $f0\n"
+      "aui $at, $a0, 0x1\n"
+      "ldc1 $f0, -0x8($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "ldc1 $f0, 0x7FE8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "ldc1 $f0, -0x7FF0($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "lwc1 $f0, -0x7FE9($at)\n"
+      "lw $t8, -0x7FE5($at)\n"
+      "mthc1 $t8, $f0\n"
+      "aui $at, $a0, 0x1\n"
+      "lwc1 $f0, 0x7FE9($at)\n"
+      "lw $t8, 0x7FED($at)\n"
+      "mthc1 $t8, $f0\n"
+      "aui $at, $a0, 0x1\n"
+      "ldc1 $f0, 0x7FF0($at)\n"
+      "aui $at, $a0, 0x1234\n"
+      "ldc1 $f0, 0x5678($at)\n";
+  DriverStr(expected, "LoadDFromOffset");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StoreDToOffset) {
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, +0);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FF8);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFB);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFC);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFF);
+  __ StoreDToOffset(mips::F0, mips::A0, -0xFFF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8008);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8001);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, +0xFFF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FE8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF1);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF1);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF8);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FE8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FE9);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FE9);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FF0);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x12345678);
+
+  const char* expected =
+      "sdc1 $f0, -0x8000($a0)\n"
+      "sdc1 $f0, 0($a0)\n"
+      "sdc1 $f0, 0x7FF8($a0)\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 0x7FFB($a0)\n"
+      "sw $t8, 0x7FFF($a0)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t8, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 7($at)\n"
+      "sw $t8, 11($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "sdc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "sdc1 $f0, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, -9($at)\n"
+      "sw $t8, -5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "sdc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "sdc1 $f0, 0x7FF8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "sdc1 $f0, -0x7FE8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "sdc1 $f0, 0x8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 0xF($at)\n"
+      "sw $t8, 0x13($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, -0xF($at)\n"
+      "sw $t8, -0xB($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "sdc1 $f0, -0x8($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "sdc1 $f0, 0x7FE8($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "sdc1 $f0, -0x7FF0($at)\n"
+      "aui $at, $a0, 0xFFFF\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, -0x7FE9($at)\n"
+      "sw $t8, -0x7FE5($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "mfhc1 $t8, $f0\n"
+      "swc1 $f0, 0x7FE9($at)\n"
+      "sw $t8, 0x7FED($at)\n"
+      "aui $at, $a0, 0x1\n"
+      "sdc1 $f0, 0x7FF0($at)\n"
+      "aui $at, $a0, 0x1234\n"
+      "sdc1 $f0, 0x5678($at)\n";
+  DriverStr(expected, "StoreDToOffset");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x3FFDE;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "lwpc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x3FFDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+}
+
+//////////////
+// BRANCHES //
+//////////////
+
+// TODO: MipsAssembler::Addiupc
+//       MipsAssembler::Bc
+//       MipsAssembler::Jic
+//       MipsAssembler::Jialc
+//       MipsAssembler::Bltc
+//       MipsAssembler::Bltzc
+//       MipsAssembler::Bgtzc
+//       MipsAssembler::Bgec
+//       MipsAssembler::Bgezc
+//       MipsAssembler::Blezc
+//       MipsAssembler::Bltuc
+//       MipsAssembler::Bgeuc
+//       MipsAssembler::Beqc
+//       MipsAssembler::Bnec
+//       MipsAssembler::Beqzc
+//       MipsAssembler::Bnezc
+//       MipsAssembler::Bc1eqz
+//       MipsAssembler::Bc1nez
+//       MipsAssembler::Buncond
+//       MipsAssembler::Bcond
+//       MipsAssembler::Call
+
+// TODO:  AssemblerMIPS32r6Test.B
+//        AssemblerMIPS32r6Test.Beq
+//        AssemblerMIPS32r6Test.Bne
+//        AssemblerMIPS32r6Test.Beqz
+//        AssemblerMIPS32r6Test.Bnez
+//        AssemblerMIPS32r6Test.Bltz
+//        AssemblerMIPS32r6Test.Bgez
+//        AssemblerMIPS32r6Test.Blez
+//        AssemblerMIPS32r6Test.Bgtz
+//        AssemblerMIPS32r6Test.Blt
+//        AssemblerMIPS32r6Test.Bge
+//        AssemblerMIPS32r6Test.Bltu
+//        AssemblerMIPS32r6Test.Bgeu
+
+#undef __
+
+}  // namespace art
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index cec43ba..50a8dc2 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -561,6 +561,14 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD");
 }
 
+TEST_F(AssemblerMIPSTest, FloorWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::FloorWS, "floor.w.s ${reg1}, ${reg2}"), "floor.w.s");
+}
+
+TEST_F(AssemblerMIPSTest, FloorWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::FloorWD, "floor.w.d ${reg1}, ${reg2}"), "floor.w.d");
+}
+
 TEST_F(AssemblerMIPSTest, CunS) {
   DriverStr(RepeatIbFF(&mips::MipsAssembler::CunS, 3, "c.un.s $fcc{imm}, ${reg1}, ${reg2}"),
             "CunS");
@@ -639,6 +647,26 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Movt, 3, "movt ${reg1}, ${reg2}, $fcc{imm}"), "Movt");
 }
 
+TEST_F(AssemblerMIPSTest, MovfS) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovfS, 3, "movf.s ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovfS");
+}
+
+TEST_F(AssemblerMIPSTest, MovfD) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovfD, 3, "movf.d ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovfD");
+}
+
+TEST_F(AssemblerMIPSTest, MovtS) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovtS, 3, "movt.s ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovtS");
+}
+
+TEST_F(AssemblerMIPSTest, MovtD) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovtD, 3, "movt.d ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovtD");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSW) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW");
 }
@@ -723,212 +751,538 @@
   DriverStr(RepeatRR(&mips::MipsAssembler::Not, "nor ${reg1}, ${reg2}, $zero"), "Not");
 }
 
-TEST_F(AssemblerMIPSTest, LoadFromOffset) {
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 256);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 1000);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x8000);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x10000);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x12345678);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, -256);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xABCDEF00);
-
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A1, 0);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A1, mips::A0, 0);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 256);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 1000);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x8000);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x10000);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x12345678);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -256);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xFFFF8000);
-  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xABCDEF00);
+TEST_F(AssemblerMIPSTest, Addiu32) {
+  __ Addiu32(mips::A1, mips::A2, -0x8000);
+  __ Addiu32(mips::A1, mips::A2, +0);
+  __ Addiu32(mips::A1, mips::A2, +0x7FFF);
+  __ Addiu32(mips::A1, mips::A2, -0x10000);
+  __ Addiu32(mips::A1, mips::A2, -0x8001);
+  __ Addiu32(mips::A1, mips::A2, +0x8000);
+  __ Addiu32(mips::A1, mips::A2, +0xFFFE);
+  __ Addiu32(mips::A1, mips::A2, -0x10001);
+  __ Addiu32(mips::A1, mips::A2, +0xFFFF);
+  __ Addiu32(mips::A1, mips::A2, +0x10000);
+  __ Addiu32(mips::A1, mips::A2, +0x10001);
+  __ Addiu32(mips::A1, mips::A2, +0x12345678);
 
   const char* expected =
-      "lb $a0, 0($a0)\n"
-      "lb $a0, 0($a1)\n"
-      "lb $a0, 256($a1)\n"
-      "lb $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "addu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
+      "addiu $a1, $a2, -0x8000\n"
+      "addiu $a1, $a2, 0\n"
+      "addiu $a1, $a2, 0x7FFF\n"
+      "addiu $at, $a2, -0x8000\n"
+      "addiu $a1, $at, -0x8000\n"
+      "addiu $at, $a2, -0x8000\n"
+      "addiu $a1, $at, -1\n"
+      "addiu $at, $a2, 0x7FFF\n"
+      "addiu $a1, $at, 1\n"
+      "addiu $at, $a2, 0x7FFF\n"
+      "addiu $a1, $at, 0x7FFF\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0xFFFF\n"
+      "addu $a1, $a2, $at\n"
+      "ori $at, $zero, 0xFFFF\n"
+      "addu $a1, $a2, $at\n"
       "lui $at, 1\n"
-      "addu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
-      "lb $a0, -256($a1)\n"
-      "lb $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
-
-      "lbu $a0, 0($a0)\n"
-      "lbu $a0, 0($a1)\n"
-      "lbu $a0, 256($a1)\n"
-      "lbu $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "addu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
+      "addu $a1, $a2, $at\n"
       "lui $at, 1\n"
-      "addu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
+      "ori $at, $at, 1\n"
+      "addu $a1, $a2, $at\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
-      "lbu $a0, -256($a1)\n"
-      "lbu $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
+      "ori $at, $at, 0x5678\n"
+      "addu $a1, $a2, $at\n";
+  DriverStr(expected, "Addiu32");
+}
 
-      "lh $a0, 0($a0)\n"
-      "lh $a0, 0($a1)\n"
-      "lh $a0, 256($a1)\n"
-      "lh $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+TEST_F(AssemblerMIPSTest, LoadFromOffset) {
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8008);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8001);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x8000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x12345678);
+
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FF8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFB);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFC);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFF);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0xFFF0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8008);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8001);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x8000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0xFFF0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FE8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x0FFF8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x0FFF1);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x0FFF1);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x0FFF8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FE8);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FF0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FE9);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FE9);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FF0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x12345678);
+
+  const char* expected =
+      "lb $a3, -0x8000($a1)\n"
+      "lb $a3, 0($a1)\n"
+      "lb $a3, 0x7FF8($a1)\n"
+      "lb $a3, 0x7FFB($a1)\n"
+      "lb $a3, 0x7FFC($a1)\n"
+      "lb $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lb $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lb $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lb $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lb $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lb $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lb $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lb $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lb $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lb $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lb $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lb $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "lb $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
+      "lb $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lb $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lb $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
-      "lh $a0, -256($a1)\n"
-      "lh $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
+      "lb $a3, 0($at)\n"
 
-      "lhu $a0, 0($a0)\n"
-      "lhu $a0, 0($a1)\n"
-      "lhu $a0, 256($a1)\n"
-      "lhu $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "lbu $a3, -0x8000($a1)\n"
+      "lbu $a3, 0($a1)\n"
+      "lbu $a3, 0x7FF8($a1)\n"
+      "lbu $a3, 0x7FFB($a1)\n"
+      "lbu $a3, 0x7FFC($a1)\n"
+      "lbu $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lbu $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lbu $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lbu $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lbu $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lbu $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lbu $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lbu $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lbu $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lbu $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lbu $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lbu $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "lbu $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
+      "lbu $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
-      "lhu $a0, -256($a1)\n"
-      "lhu $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
+      "lbu $a3, 0($at)\n"
 
-      "lw $a0, 0($a0)\n"
-      "lw $a0, 0($a1)\n"
-      "lw $a0, 256($a1)\n"
-      "lw $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "lh $a3, -0x8000($a1)\n"
+      "lh $a3, 0($a1)\n"
+      "lh $a3, 0x7FF8($a1)\n"
+      "lh $a3, 0x7FFB($a1)\n"
+      "lh $a3, 0x7FFC($a1)\n"
+      "lh $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lh $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lh $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lh $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lh $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lh $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lh $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lh $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lh $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lh $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lh $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lh $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "lh $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
+      "lh $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lh $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lh $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
-      "lw $a0, -256($a1)\n"
-      "lw $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
+      "lh $a3, 0($at)\n"
 
-      "lw $a1, 4($a0)\n"
-      "lw $a0, 0($a0)\n"
-      "lw $a0, 0($a1)\n"
-      "lw $a1, 4($a1)\n"
-      "lw $a1, 0($a0)\n"
-      "lw $a2, 4($a0)\n"
+      "lhu $a3, -0x8000($a1)\n"
+      "lhu $a3, 0($a1)\n"
+      "lhu $a3, 0x7FF8($a1)\n"
+      "lhu $a3, 0x7FFB($a1)\n"
+      "lhu $a3, 0x7FFC($a1)\n"
+      "lhu $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lhu $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lhu $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lhu $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lhu $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lhu $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lhu $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lhu $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lhu $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lhu $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lhu $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lhu $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a3, 0($at)\n"
+
+      "lw $a3, -0x8000($a1)\n"
+      "lw $a3, 0($a1)\n"
+      "lw $a3, 0x7FF8($a1)\n"
+      "lw $a3, 0x7FFB($a1)\n"
+      "lw $a3, 0x7FFC($a1)\n"
+      "lw $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lw $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lw $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "lw $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lw $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "lw $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lw $a3, 0($at)\n"
+
+      "lw $a0, -0x8000($a2)\n"
+      "lw $a1, -0x7FFC($a2)\n"
       "lw $a0, 0($a2)\n"
       "lw $a1, 4($a2)\n"
-      "lw $a0, 256($a2)\n"
-      "lw $a1, 260($a2)\n"
-      "lw $a0, 1000($a2)\n"
-      "lw $a1, 1004($a2)\n"
-      "ori $at, $zero, 0x8000\n"
+      "lw $a0, 0x7FF8($a2)\n"
+      "lw $a1, 0x7FFC($a2)\n"
+      "lw $a0, 0x7FFB($a2)\n"
+      "lw $a1, 0x7FFF($a2)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "lw $a0, 4($at)\n"
+      "lw $a1, 8($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "lw $a0, 7($at)\n"
+      "lw $a1, 11($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "lw $a0, -0x7FF8($at)\n"
+      "lw $a1, -0x7FF4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "lw $a0, -0x10($at)\n"
+      "lw $a1, -0xC($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "lw $a0, -9($at)\n"
+      "lw $a1, -5($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "lw $a0, 8($at)\n"
+      "lw $a1, 12($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "lw $a0, 0x7FF8($at)\n"
+      "lw $a1, 0x7FFC($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a0, -0x7FF8($at)\n"
+      "lw $a1, -0x7FF4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a0, -8($at)\n"
+      "lw $a1, -4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lw $a0, -1($at)\n"
+      "lw $a1, 3($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a0, 1($at)\n"
+      "lw $a1, 5($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a0, 8($at)\n"
+      "lw $a1, 12($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lw $a0, 0x7FF8($at)\n"
+      "lw $a1, 0x7FFC($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a2\n"
       "lw $a0, 0($at)\n"
       "lw $a1, 4($at)\n"
-      "lui $at, 1\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 7($at)\n"
+      "lw $a1, 11($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 1($at)\n"
+      "lw $a1, 5($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
       "addu $at, $at, $a2\n"
       "lw $a0, 0($at)\n"
       "lw $a1, 4($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a2\n"
-      "lw $a0, 0($at)\n"
-      "lw $a1, 4($at)\n"
-      "lw $a0, -256($a2)\n"
-      "lw $a1, -252($a2)\n"
-      "lw $a0, 0xFFFF8000($a2)\n"
-      "lw $a1, 0xFFFF8004($a2)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a2\n"
       "lw $a0, 0($at)\n"
       "lw $a1, 4($at)\n";
@@ -936,208 +1290,513 @@
 }
 
 TEST_F(AssemblerMIPSTest, LoadSFromOffset) {
-  __ LoadSFromOffset(mips::F0, mips::A0, 0);
-  __ LoadSFromOffset(mips::F0, mips::A0, 4);
-  __ LoadSFromOffset(mips::F0, mips::A0, 256);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0x8000);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0x10000);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0x12345678);
-  __ LoadSFromOffset(mips::F0, mips::A0, -256);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0xFFFF8000);
-  __ LoadSFromOffset(mips::F0, mips::A0, 0xABCDEF00);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x8000);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x7FF8);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFB);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFC);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFF);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0xFFF0);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x8008);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x8001);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x8000);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0xFFF0);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x17FE8);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x0FFF8);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x0FFF1);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x0FFF1);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x0FFF8);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x17FE8);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x17FF0);
+  __ LoadSFromOffset(mips::F2, mips::A0, -0x17FE9);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x17FE9);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x17FF0);
+  __ LoadSFromOffset(mips::F2, mips::A0, +0x12345678);
 
   const char* expected =
-      "lwc1 $f0, 0($a0)\n"
-      "lwc1 $f0, 4($a0)\n"
-      "lwc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x8000\n"
+      "lwc1 $f2, -0x8000($a0)\n"
+      "lwc1 $f2, 0($a0)\n"
+      "lwc1 $f2, 0x7FF8($a0)\n"
+      "lwc1 $f2, 0x7FFB($a0)\n"
+      "lwc1 $f2, 0x7FFC($a0)\n"
+      "lwc1 $f2, 0x7FFF($a0)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f2, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f2, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f2, -9($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f2, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f2, 0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lwc1 $f2, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lwc1 $f2, -8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lwc1 $f2, -1($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lwc1 $f2, 1($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lwc1 $f2, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lwc1 $f2, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
-      "lui $at, 1\n"
+      "lwc1 $f2, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
+      "lwc1 $f2, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f2, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f2, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
-      "lwc1 $f0, -256($a0)\n"
-      "lwc1 $f0, 0xFFFF8000($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n";
+      "lwc1 $f2, 0($at)\n";
   DriverStr(expected, "LoadSFromOffset");
 }
 
-
 TEST_F(AssemblerMIPSTest, LoadDFromOffset) {
-  __ LoadDFromOffset(mips::F0, mips::A0, 0);
-  __ LoadDFromOffset(mips::F0, mips::A0, 4);
-  __ LoadDFromOffset(mips::F0, mips::A0, 256);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0x8000);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0x10000);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0x12345678);
-  __ LoadDFromOffset(mips::F0, mips::A0, -256);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0xFFFF8000);
-  __ LoadDFromOffset(mips::F0, mips::A0, 0xABCDEF00);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFB);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFC);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFF);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0xFFF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8008);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x8001);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0xFFF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF1);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF1);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF8);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE8);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE9);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE9);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x17FF0);
+  __ LoadDFromOffset(mips::F0, mips::A0, +0x12345678);
 
   const char* expected =
+      "ldc1 $f0, -0x8000($a0)\n"
       "ldc1 $f0, 0($a0)\n"
-      "lwc1 $f0, 4($a0)\n"
-      "lwc1 $f1, 8($a0)\n"
-      "ldc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x8000\n"
+      "ldc1 $f0, 0x7FF8($a0)\n"
+      "lwc1 $f0, 0x7FFB($a0)\n"
+      "lwc1 $f1, 0x7FFF($a0)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f0, 4($at)\n"
+      "lwc1 $f1, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "lwc1 $f0, 7($at)\n"
+      "lwc1 $f1, 11($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "ldc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "ldc1 $f0, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "lwc1 $f0, -9($at)\n"
+      "lwc1 $f1, -5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "ldc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "ldc1 $f0, 0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "ldc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "ldc1 $f0, -8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "lwc1 $f0, -1($at)\n"
+      "lwc1 $f1, 3($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "lwc1 $f0, 1($at)\n"
+      "lwc1 $f1, 5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "ldc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "ldc1 $f0, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
       "ldc1 $f0, 0($at)\n"
-      "lui $at, 1\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 7($at)\n"
+      "lwc1 $f1, 11($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 1($at)\n"
+      "lwc1 $f1, 5($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
       "addu $at, $at, $a0\n"
       "ldc1 $f0, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a0\n"
-      "ldc1 $f0, 0($at)\n"
-      "ldc1 $f0, -256($a0)\n"
-      "ldc1 $f0, 0xFFFF8000($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a0\n"
       "ldc1 $f0, 0($at)\n";
   DriverStr(expected, "LoadDFromOffset");
 }
 
 TEST_F(AssemblerMIPSTest, StoreToOffset) {
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A0, 0);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 256);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 1000);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x8000);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x10000);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x12345678);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, -256);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xFFFF8000);
-  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xABCDEF00);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8000);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FF8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFB);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFC);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFF);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0xFFF0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8008);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8001);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x8000);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0xFFF0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FE8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x0FFF8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x0FFF1);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x0FFF1);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x0FFF8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FE8);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FF0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FE9);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FE9);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FF0);
+  __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x12345678);
 
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A0, 0);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 256);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 1000);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x8000);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x10000);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x12345678);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, -256);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xFFFF8000);
-  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xABCDEF00);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FF8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFB);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFC);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFF);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0xFFF0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8008);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8001);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x8000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0xFFF0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FE8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x0FFF8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x0FFF1);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x0FFF1);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x0FFF8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FE8);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FF0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FE9);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FE9);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FF0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x12345678);
 
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A0, 0);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 256);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 1000);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x8000);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x10000);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x12345678);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, -256);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xFFFF8000);
-  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xABCDEF00);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8000);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FF8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFB);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFC);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFF);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0xFFF0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8008);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8001);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x8000);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0xFFF0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FE8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x0FFF8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x0FFF1);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x0FFF1);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x0FFF8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FE8);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FF0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FE9);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FE9);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FF0);
+  __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x12345678);
 
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 256);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 1000);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x8000);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x10000);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x12345678);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -256);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xFFFF8000);
-  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xABCDEF00);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FF8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFB);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFC);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFF);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0xFFF0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8008);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8001);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x8000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0xFFF0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FE8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x0FFF8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x0FFF1);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x0FFF1);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x0FFF8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FE8);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FF0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FE9);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FE9);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FF0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x12345678);
 
   const char* expected =
-      "sb $a0, 0($a0)\n"
-      "sb $a0, 0($a1)\n"
-      "sb $a0, 256($a1)\n"
-      "sb $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sb $a3, -0x8000($a1)\n"
+      "sb $a3, 0($a1)\n"
+      "sb $a3, 0x7FF8($a1)\n"
+      "sb $a3, 0x7FFB($a1)\n"
+      "sb $a3, 0x7FFC($a1)\n"
+      "sb $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sb $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sb $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sb $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sb $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sb $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sb $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sb $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sb $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sb $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sb $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sb $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "sb $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
+      "sb $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "sb $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "sb $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
-      "sb $a0, -256($a1)\n"
-      "sb $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
+      "sb $a3, 0($at)\n"
 
-      "sh $a0, 0($a0)\n"
-      "sh $a0, 0($a1)\n"
-      "sh $a0, 256($a1)\n"
-      "sh $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sh $a3, -0x8000($a1)\n"
+      "sh $a3, 0($a1)\n"
+      "sh $a3, 0x7FF8($a1)\n"
+      "sh $a3, 0x7FFB($a1)\n"
+      "sh $a3, 0x7FFC($a1)\n"
+      "sh $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sh $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sh $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sh $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sh $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sh $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sh $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sh $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sh $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sh $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sh $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sh $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "sh $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
+      "sh $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "sh $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "sh $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
-      "sh $a0, -256($a1)\n"
-      "sh $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
+      "sh $a3, 0($at)\n"
 
-      "sw $a0, 0($a0)\n"
-      "sw $a0, 0($a1)\n"
-      "sw $a0, 256($a1)\n"
-      "sw $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sw $a3, -0x8000($a1)\n"
+      "sw $a3, 0($a1)\n"
+      "sw $a3, 0x7FF8($a1)\n"
+      "sw $a3, 0x7FFB($a1)\n"
+      "sw $a3, 0x7FFC($a1)\n"
+      "sw $a3, 0x7FFF($a1)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sw $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sw $a3, -0x10($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "sw $a3, -9($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sw $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "sw $a3, 0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a3, -0x7FF8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a3, -8($at)\n"
+      "addiu $at, $a1, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a3, -1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a3, 1($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a3, 8($at)\n"
+      "addiu $at, $a1, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a3, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
-      "lui $at, 1\n"
+      "sw $a3, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
+      "sw $a3, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a1\n"
+      "sw $a3, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a1\n"
+      "sw $a3, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
-      "sw $a0, -256($a1)\n"
-      "sw $a0, 0xFFFF8000($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
+      "sw $a3, 0($at)\n"
 
+      "sw $a0, -0x8000($a2)\n"
+      "sw $a1, -0x7FFC($a2)\n"
       "sw $a0, 0($a2)\n"
       "sw $a1, 4($a2)\n"
-      "sw $a0, 256($a2)\n"
-      "sw $a1, 260($a2)\n"
-      "sw $a0, 1000($a2)\n"
-      "sw $a1, 1004($a2)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sw $a0, 0x7FF8($a2)\n"
+      "sw $a1, 0x7FFC($a2)\n"
+      "sw $a0, 0x7FFB($a2)\n"
+      "sw $a1, 0x7FFF($a2)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "sw $a0, 4($at)\n"
+      "sw $a1, 8($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "sw $a0, 7($at)\n"
+      "sw $a1, 11($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "sw $a0, -0x7FF8($at)\n"
+      "sw $a1, -0x7FF4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "sw $a0, -0x10($at)\n"
+      "sw $a1, -0xC($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "sw $a0, -9($at)\n"
+      "sw $a1, -5($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "sw $a0, 8($at)\n"
+      "sw $a1, 12($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "sw $a0, 0x7FF8($at)\n"
+      "sw $a1, 0x7FFC($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a0, -0x7FF8($at)\n"
+      "sw $a1, -0x7FF4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a0, -8($at)\n"
+      "sw $a1, -4($at)\n"
+      "addiu $at, $a2, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sw $a0, -1($at)\n"
+      "sw $a1, 3($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a0, 1($at)\n"
+      "sw $a1, 5($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a0, 8($at)\n"
+      "sw $a1, 12($at)\n"
+      "addiu $at, $a2, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sw $a0, 0x7FF8($at)\n"
+      "sw $a1, 0x7FFC($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a2\n"
       "sw $a0, 0($at)\n"
       "sw $a1, 4($at)\n"
-      "lui $at, 1\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 7($at)\n"
+      "sw $a1, 11($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 1($at)\n"
+      "sw $a1, 5($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
       "addu $at, $at, $a2\n"
       "sw $a0, 0($at)\n"
       "sw $a1, 4($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a2\n"
-      "sw $a0, 0($at)\n"
-      "sw $a1, 4($at)\n"
-      "sw $a0, -256($a2)\n"
-      "sw $a1, -252($a2)\n"
-      "sw $a0, 0xFFFF8000($a2)\n"
-      "sw $a1, 0xFFFF8004($a2)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a2\n"
       "sw $a0, 0($at)\n"
       "sw $a1, 4($at)\n";
@@ -1145,69 +1804,174 @@
 }
 
 TEST_F(AssemblerMIPSTest, StoreSToOffset) {
-  __ StoreSToOffset(mips::F0, mips::A0, 0);
-  __ StoreSToOffset(mips::F0, mips::A0, 4);
-  __ StoreSToOffset(mips::F0, mips::A0, 256);
-  __ StoreSToOffset(mips::F0, mips::A0, 0x8000);
-  __ StoreSToOffset(mips::F0, mips::A0, 0x10000);
-  __ StoreSToOffset(mips::F0, mips::A0, 0x12345678);
-  __ StoreSToOffset(mips::F0, mips::A0, -256);
-  __ StoreSToOffset(mips::F0, mips::A0, 0xFFFF8000);
-  __ StoreSToOffset(mips::F0, mips::A0, 0xABCDEF00);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x8000);
+  __ StoreSToOffset(mips::F2, mips::A0, +0);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x7FF8);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x7FFB);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x7FFC);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x7FFF);
+  __ StoreSToOffset(mips::F2, mips::A0, -0xFFF0);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x8008);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x8001);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x8000);
+  __ StoreSToOffset(mips::F2, mips::A0, +0xFFF0);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x17FE8);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x0FFF8);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x0FFF1);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x0FFF1);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x0FFF8);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x17FE8);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x17FF0);
+  __ StoreSToOffset(mips::F2, mips::A0, -0x17FE9);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x17FE9);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x17FF0);
+  __ StoreSToOffset(mips::F2, mips::A0, +0x12345678);
 
   const char* expected =
-      "swc1 $f0, 0($a0)\n"
-      "swc1 $f0, 4($a0)\n"
-      "swc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x8000\n"
+      "swc1 $f2, -0x8000($a0)\n"
+      "swc1 $f2, 0($a0)\n"
+      "swc1 $f2, 0x7FF8($a0)\n"
+      "swc1 $f2, 0x7FFB($a0)\n"
+      "swc1 $f2, 0x7FFC($a0)\n"
+      "swc1 $f2, 0x7FFF($a0)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "swc1 $f2, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "swc1 $f2, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "swc1 $f2, -9($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "swc1 $f2, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "swc1 $f2, 0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "swc1 $f2, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "swc1 $f2, -8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "swc1 $f2, -1($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "swc1 $f2, 1($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "swc1 $f2, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "swc1 $f2, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
-      "lui $at, 1\n"
+      "swc1 $f2, 0($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
+      "swc1 $f2, 7($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f2, 1($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f2, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
-      "swc1 $f0, -256($a0)\n"
-      "swc1 $f0, 0xFFFF8000($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "addu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n";
+      "swc1 $f2, 0($at)\n";
   DriverStr(expected, "StoreSToOffset");
 }
 
 TEST_F(AssemblerMIPSTest, StoreDToOffset) {
-  __ StoreDToOffset(mips::F0, mips::A0, 0);
-  __ StoreDToOffset(mips::F0, mips::A0, 4);
-  __ StoreDToOffset(mips::F0, mips::A0, 256);
-  __ StoreDToOffset(mips::F0, mips::A0, 0x8000);
-  __ StoreDToOffset(mips::F0, mips::A0, 0x10000);
-  __ StoreDToOffset(mips::F0, mips::A0, 0x12345678);
-  __ StoreDToOffset(mips::F0, mips::A0, -256);
-  __ StoreDToOffset(mips::F0, mips::A0, 0xFFFF8000);
-  __ StoreDToOffset(mips::F0, mips::A0, 0xABCDEF00);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, +0);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FF8);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFB);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFC);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x7FFF);
+  __ StoreDToOffset(mips::F0, mips::A0, -0xFFF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8008);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x8001);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, +0xFFF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FE8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF1);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF1);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF8);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FE8);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FF0);
+  __ StoreDToOffset(mips::F0, mips::A0, -0x17FE9);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FE9);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x17FF0);
+  __ StoreDToOffset(mips::F0, mips::A0, +0x12345678);
 
   const char* expected =
+      "sdc1 $f0, -0x8000($a0)\n"
       "sdc1 $f0, 0($a0)\n"
-      "swc1 $f0, 4($a0)\n"
-      "swc1 $f1, 8($a0)\n"
-      "sdc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x8000\n"
+      "sdc1 $f0, 0x7FF8($a0)\n"
+      "swc1 $f0, 0x7FFB($a0)\n"
+      "swc1 $f1, 0x7FFF($a0)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "swc1 $f0, 4($at)\n"
+      "swc1 $f1, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "swc1 $f0, 7($at)\n"
+      "swc1 $f1, 11($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "sdc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "sdc1 $f0, -0x10($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "swc1 $f0, -9($at)\n"
+      "swc1 $f1, -5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "sdc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "sdc1 $f0, 0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sdc1 $f0, -0x7FF8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "sdc1 $f0, -8($at)\n"
+      "addiu $at, $a0, -0x7FF8\n"
+      "addiu $at, $at, -0x7FF8\n"
+      "swc1 $f0, -1($at)\n"
+      "swc1 $f1, 3($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "swc1 $f0, 1($at)\n"
+      "swc1 $f1, 5($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sdc1 $f0, 8($at)\n"
+      "addiu $at, $a0, 0x7FF8\n"
+      "addiu $at, $at, 0x7FF8\n"
+      "sdc1 $f0, 0x7FF8($at)\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
       "addu $at, $at, $a0\n"
       "sdc1 $f0, 0($at)\n"
-      "lui $at, 1\n"
+      "lui $at, 0xFFFE\n"
+      "ori $at, $at, 0x8010\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 7($at)\n"
+      "swc1 $f1, 11($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FE8\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 1($at)\n"
+      "swc1 $f1, 5($at)\n"
+      "lui $at, 0x1\n"
+      "ori $at, $at, 0x7FF0\n"
       "addu $at, $at, $a0\n"
       "sdc1 $f0, 0($at)\n"
       "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "addu $at, $at, $a0\n"
-      "sdc1 $f0, 0($at)\n"
-      "sdc1 $f0, -256($a0)\n"
-      "sdc1 $f0, 0xFFFF8000($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
+      "ori $at, $at, 0x5678\n"
       "addu $at, $at, $a0\n"
       "sdc1 $f0, 0($at)\n";
   DriverStr(expected, "StoreDToOffset");
@@ -1484,6 +2248,89 @@
   DriverStr(expected, "Bc1t");
 }
 
+///////////////////////
+// Loading Constants //
+///////////////////////
+
+TEST_F(AssemblerMIPSTest, LoadConst32) {
+  // IsUint<16>(value)
+  __ LoadConst32(mips::V0, 0);
+  __ LoadConst32(mips::V0, 65535);
+  // IsInt<16>(value)
+  __ LoadConst32(mips::V0, -1);
+  __ LoadConst32(mips::V0, -32768);
+  // Everything else
+  __ LoadConst32(mips::V0, 65536);
+  __ LoadConst32(mips::V0, 65537);
+  __ LoadConst32(mips::V0, 2147483647);
+  __ LoadConst32(mips::V0, -32769);
+  __ LoadConst32(mips::V0, -65536);
+  __ LoadConst32(mips::V0, -65537);
+  __ LoadConst32(mips::V0, -2147483647);
+  __ LoadConst32(mips::V0, -2147483648);
+
+  const char* expected =
+      // IsUint<16>(value)
+      "ori $v0, $zero, 0\n"         // __ LoadConst32(mips::V0, 0);
+      "ori $v0, $zero, 65535\n"     // __ LoadConst32(mips::V0, 65535);
+      // IsInt<16>(value)
+      "addiu $v0, $zero, -1\n"      // __ LoadConst32(mips::V0, -1);
+      "addiu $v0, $zero, -32768\n"  // __ LoadConst32(mips::V0, -32768);
+      // Everything else
+      "lui $v0, 1\n"                // __ LoadConst32(mips::V0, 65536);
+      "lui $v0, 1\n"                // __ LoadConst32(mips::V0, 65537);
+      "ori $v0, 1\n"                //                 "
+      "lui $v0, 32767\n"            // __ LoadConst32(mips::V0, 2147483647);
+      "ori $v0, 65535\n"            //                 "
+      "lui $v0, 65535\n"            // __ LoadConst32(mips::V0, -32769);
+      "ori $v0, 32767\n"            //                 "
+      "lui $v0, 65535\n"            // __ LoadConst32(mips::V0, -65536);
+      "lui $v0, 65534\n"            // __ LoadConst32(mips::V0, -65537);
+      "ori $v0, 65535\n"            //                 "
+      "lui $v0, 32768\n"            // __ LoadConst32(mips::V0, -2147483647);
+      "ori $v0, 1\n"                //                 "
+      "lui $v0, 32768\n";           // __ LoadConst32(mips::V0, -2147483648);
+  DriverStr(expected, "LoadConst32");
+}
+
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ BindPcRelBaseLabel();
+  __ LoadLiteral(mips::V0, mips::V1, literal);
+  constexpr size_t kAddiuCount = 0x1FDE;
+  for (size_t i = 0; i != kAddiuCount; ++i) {
+    __ Addiu(mips::A0, mips::A1, 0);
+  }
+
+  std::string expected =
+      "1:\n"
+      "lw $v0, %lo(2f - 1b)($v1)\n" +
+      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ BindPcRelBaseLabel();
+  __ LoadLiteral(mips::V0, mips::V1, literal);
+  constexpr size_t kAdduCount = 0x1FDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "lui $at, %hi(2f - 1b)\n"
+      "addu $at, $at, $v1\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h
index 5e7ed11..66204e7 100644
--- a/compiler/utils/mips/managed_register_mips.h
+++ b/compiler/utils/mips/managed_register_mips.h
@@ -87,70 +87,70 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class MipsManagedRegister : public ManagedRegister {
  public:
-  Register AsCoreRegister() const {
+  constexpr Register AsCoreRegister() const {
     CHECK(IsCoreRegister());
     return static_cast<Register>(id_);
   }
 
-  FRegister AsFRegister() const {
+  constexpr FRegister AsFRegister() const {
     CHECK(IsFRegister());
     return static_cast<FRegister>(id_ - kNumberOfCoreRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfFRegIds);
   }
 
-  FRegister AsOverlappingDRegisterLow() const {
+  constexpr FRegister AsOverlappingDRegisterLow() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<FRegister>(d_reg * 2);
   }
 
-  FRegister AsOverlappingDRegisterHigh() const {
+  constexpr FRegister AsOverlappingDRegisterHigh() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<FRegister>(d_reg * 2 + 1);
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCoreRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCoreRegister();
   }
 
-  bool IsCoreRegister() const {
+  constexpr bool IsCoreRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCoreRegIds);
   }
 
-  bool IsFRegister() const {
+  constexpr bool IsFRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCoreRegIds;
     return (0 <= test) && (test < kNumberOfFRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
   // Returns true if this DRegister overlaps FRegisters.
-  bool IsOverlappingDRegister() const {
+  constexpr bool IsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds);
     return (0 <= test) && (test < kNumberOfOverlappingDRegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test =
         id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds);
@@ -164,32 +164,32 @@
   // then false is returned.
   bool Overlaps(const MipsManagedRegister& other) const;
 
-  static MipsManagedRegister FromCoreRegister(Register r) {
+  static constexpr MipsManagedRegister FromCoreRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static MipsManagedRegister FromFRegister(FRegister r) {
+  static constexpr MipsManagedRegister FromFRegister(FRegister r) {
     CHECK_NE(r, kNoFRegister);
     return FromRegId(r + kNumberOfCoreRegIds);
   }
 
-  static MipsManagedRegister FromDRegister(DRegister r) {
+  static constexpr MipsManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + kNumberOfCoreRegIds + kNumberOfFRegIds);
   }
 
-  static MipsManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr MipsManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -205,9 +205,9 @@
 
   friend class ManagedRegister;
 
-  explicit MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static MipsManagedRegister FromRegId(int reg_id) {
+  static constexpr MipsManagedRegister FromRegId(int reg_id) {
     MipsManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -218,7 +218,7 @@
 
 }  // namespace mips
 
-inline mips::MipsManagedRegister ManagedRegister::AsMips() const {
+constexpr inline mips::MipsManagedRegister ManagedRegister::AsMips() const {
   mips::MipsManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index ab480ca..a2621cb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -26,6 +26,11 @@
 namespace art {
 namespace mips64 {
 
+static_assert(static_cast<size_t>(kMips64PointerSize) == kMips64DoublewordSize,
+              "Unexpected Mips64 pointer size.");
+static_assert(kMips64PointerSize == PointerSize::k64, "Unexpected Mips64 pointer size.");
+
+
 void Mips64Assembler::FinalizeCode() {
   for (auto& exception_block : exception_blocks_) {
     EmitExceptionPoll(&exception_block);
@@ -1977,8 +1982,9 @@
 
 constexpr size_t kFramePointerSize = 8;
 
-void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                 const std::vector<ManagedRegister>& callee_save_regs,
+void Mips64Assembler::BuildFrame(size_t frame_size,
+                                 ManagedRegister method_reg,
+                                 ArrayRef<const ManagedRegister> callee_save_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
@@ -1992,7 +1998,7 @@
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
-    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister();
     StoreToOffset(kStoreDoubleword, reg, SP, stack_offset);
     cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
@@ -2003,7 +2009,7 @@
   // Write out entry spills.
   int32_t offset = frame_size + kFramePointerSize;
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Mips64ManagedRegister reg = entry_spills.at(i).AsMips64();
+    Mips64ManagedRegister reg = entry_spills[i].AsMips64();
     ManagedRegisterSpill spill = entry_spills.at(i);
     int32_t size = spill.getSize();
     if (reg.IsNoRegister()) {
@@ -2022,7 +2028,7 @@
 }
 
 void Mips64Assembler::RemoveFrame(size_t frame_size,
-                                  const std::vector<ManagedRegister>& callee_save_regs) {
+                                  ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
   cfi_.RememberState();
@@ -2030,7 +2036,7 @@
   // Pop callee saves and return address
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
-    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister();
     LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset);
     cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
@@ -2109,16 +2115,16 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs,
-                                                 FrameOffset fr_offs,
-                                                 ManagedRegister mscratch) {
+void Mips64Assembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                               FrameOffset fr_offs,
+                                               ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   Daddiu64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2135,9 +2141,7 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void Mips64Assembler::LoadFromThread64(ManagedRegister mdest,
-                                       ThreadOffset<kMips64DoublewordSize> src,
-                                       size_t size) {
+void Mips64Assembler::LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -2170,8 +2174,7 @@
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
 }
 
-void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                             ThreadOffset<kMips64DoublewordSize> offs) {
+void Mips64Assembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
@@ -2215,18 +2218,18 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<kMips64DoublewordSize> thr_offs,
-                                             ManagedRegister mscratch) {
+void Mips64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                           ThreadOffset64 thr_offs,
+                                           ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs,
-                                           FrameOffset fr_offs,
-                                           ManagedRegister mscratch) {
+void Mips64Assembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                                         FrameOffset fr_offs,
+                                         ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
@@ -2428,8 +2431,8 @@
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset ATTRIBUTE_UNUSED,
-                                       ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+void Mips64Assembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
+                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
@@ -2448,7 +2451,7 @@
   LoadFromOffset(kLoadDoubleword,
                  scratch.AsGpuRegister(),
                  S1,
-                 Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value());
+                 Thread::ExceptionOffset<kMips64PointerSize>().Int32Value());
   Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry());
 }
 
@@ -2465,7 +2468,7 @@
   LoadFromOffset(kLoadDoubleword,
                  T9,
                  S1,
-                 QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pDeliverException).Int32Value());
+                 QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pDeliverException).Int32Value());
   Jr(T9);
   Nop();
 
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 8acc38a..a7d350c 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -20,12 +20,14 @@
 #include <utility>
 #include <vector>
 
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_mips64.h"
 #include "globals.h"
 #include "managed_register_mips64.h"
 #include "offsets.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/label.h"
 
 namespace art {
@@ -100,7 +102,7 @@
   DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
 };
 
-class Mips64Assembler FINAL : public Assembler {
+class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> {
  public:
   explicit Mips64Assembler(ArenaAllocator* arena)
       : Assembler(arena),
@@ -118,6 +120,9 @@
     }
   }
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
   // Emit Machine Instructions.
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -365,13 +370,13 @@
   //
 
   // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size,
-                   const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
@@ -383,10 +388,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister mscratch) OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
@@ -394,9 +400,7 @@
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread64(ManagedRegister mdest,
-                        ThreadOffset<kMips64DoublewordSize> src,
-                        size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -405,18 +409,19 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread64(ManagedRegister mdest,
-                              ThreadOffset<kMips64DoublewordSize> offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMips64DoublewordSize> thr_offs,
-                              ManagedRegister mscratch) OVERRIDE;
-
-  void CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                          FrameOffset fr_offs,
+                          ManagedRegister mscratch) OVERRIDE;
+
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
@@ -471,8 +476,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset,
-                        ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index 1d36128..c9f9556 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -39,22 +39,22 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class Mips64ManagedRegister : public ManagedRegister {
  public:
-  GpuRegister AsGpuRegister() const {
+  constexpr GpuRegister AsGpuRegister() const {
     CHECK(IsGpuRegister());
     return static_cast<GpuRegister>(id_);
   }
 
-  FpuRegister AsFpuRegister() const {
+  constexpr FpuRegister AsFpuRegister() const {
     CHECK(IsFpuRegister());
     return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
   }
 
-  bool IsGpuRegister() const {
+  constexpr bool IsGpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
   }
 
-  bool IsFpuRegister() const {
+  constexpr bool IsFpuRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfGpuRegIds;
     return (0 <= test) && (test < kNumberOfFpuRegIds);
@@ -67,22 +67,22 @@
   // then false is returned.
   bool Overlaps(const Mips64ManagedRegister& other) const;
 
-  static Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
+  static constexpr Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
     CHECK_NE(r, kNoGpuRegister);
     return FromRegId(r);
   }
 
-  static Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
+  static constexpr Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
     CHECK_NE(r, kNoFpuRegister);
     return FromRegId(r + kNumberOfGpuRegIds);
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -98,9 +98,9 @@
 
   friend class ManagedRegister;
 
-  explicit Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static Mips64ManagedRegister FromRegId(int reg_id) {
+  static constexpr Mips64ManagedRegister FromRegId(int reg_id) {
     Mips64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -111,7 +111,7 @@
 
 }  // namespace mips64
 
-inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
+constexpr inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
   mips64::Mips64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc
new file mode 100644
index 0000000..0fd9e5b
--- /dev/null
+++ b/compiler/utils/string_reference_test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "string_reference.h"
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "utils/test_dex_file_builder.h"
+
+namespace art {
+
+TEST(StringReference, ValueComparator) {
+  // This is a regression test for the StringReferenceValueComparator using the wrong
+  // dex file to get the string data from a StringId. We construct two dex files with
+  // just a single string with the same length but different value. This creates dex
+  // files that have the same layout, so the byte offset read from the StringId in one
+  // dex file, when used in the other dex file still points to valid string data, except
+  // that it's the wrong string. Without the fix the strings would then compare equal.
+  TestDexFileBuilder builder1;
+  builder1.AddString("String1");
+  std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
+  ASSERT_EQ(1u, dex_file1->NumStringIds());
+  ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(0)));
+  StringReference sr1(dex_file1.get(), 0);
+
+  TestDexFileBuilder builder2;
+  builder2.AddString("String2");
+  std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 2");
+  ASSERT_EQ(1u, dex_file2->NumStringIds());
+  ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(0)));
+  StringReference sr2(dex_file2.get(), 0);
+
+  StringReferenceValueComparator cmp;
+  EXPECT_TRUE(cmp(sr1, sr2));  // "String1" < "String2" is true.
+  EXPECT_FALSE(cmp(sr2, sr1));  // "String2" < "String1" is false.
+}
+
+TEST(StringReference, ValueComparator2) {
+  const char* const kDexFile1Strings[] = {
+      "",
+      "abc",
+      "abcxyz",
+  };
+  const char* const kDexFile2Strings[] = {
+      "a",
+      "abc",
+      "abcdef",
+      "def",
+  };
+  const bool expectedCmp12[arraysize(kDexFile1Strings)][arraysize(kDexFile2Strings)] = {
+      { true, true, true, true },
+      { false, false, true, true },
+      { false, false, false, true },
+  };
+  const bool expectedCmp21[arraysize(kDexFile2Strings)][arraysize(kDexFile1Strings)] = {
+      { false, true, true },
+      { false, false, true },
+      { false, false, true },
+      { false, false, false },
+  };
+
+  TestDexFileBuilder builder1;
+  for (const char* s : kDexFile1Strings) {
+    builder1.AddString(s);
+  }
+  std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
+  ASSERT_EQ(arraysize(kDexFile1Strings), dex_file1->NumStringIds());
+  for (size_t index = 0; index != arraysize(kDexFile1Strings); ++index) {
+    ASSERT_STREQ(kDexFile1Strings[index], dex_file1->GetStringData(dex_file1->GetStringId(index)));
+  }
+
+  TestDexFileBuilder builder2;
+  for (const char* s : kDexFile2Strings) {
+    builder2.AddString(s);
+  }
+  std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 1");
+  ASSERT_EQ(arraysize(kDexFile2Strings), dex_file2->NumStringIds());
+  for (size_t index = 0; index != arraysize(kDexFile2Strings); ++index) {
+    ASSERT_STREQ(kDexFile2Strings[index], dex_file2->GetStringData(dex_file2->GetStringId(index)));
+  }
+
+  StringReferenceValueComparator cmp;
+  for (size_t index1 = 0; index1 != arraysize(kDexFile1Strings); ++index1) {
+    for (size_t index2 = 0; index2 != arraysize(kDexFile2Strings); ++index2) {
+      StringReference sr1(dex_file1.get(), index1);
+      StringReference sr2(dex_file2.get(), index2);
+      EXPECT_EQ(expectedCmp12[index1][index2], cmp(sr1, sr2)) << index1 << " " << index2;
+      EXPECT_EQ(expectedCmp21[index2][index1], cmp(sr2, sr1)) << index1 << " " << index2;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index fb37804..6921780 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -227,9 +227,18 @@
     // Write the complete header again, just simpler that way.
     std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
+    static constexpr bool kVerify = false;
+    static constexpr bool kVerifyChecksum = false;
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file(DexFile::Open(
-        &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, false, &error_msg));
+        &dex_file_data_[0],
+        dex_file_data_.size(),
+        dex_location,
+        0u,
+        nullptr,
+        kVerify,
+        kVerifyChecksum,
+        &error_msg));
     CHECK(dex_file != nullptr) << error_msg;
     return dex_file;
   }
diff --git a/compiler/utils/transform_array_ref.h b/compiler/utils/transform_array_ref.h
new file mode 100644
index 0000000..a6da34f
--- /dev/null
+++ b/compiler/utils/transform_array_ref.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
+#define ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
+
+#include <type_traits>
+
+#include "utils/array_ref.h"
+#include "utils/transform_iterator.h"
+
+namespace art {
+
+/**
+ * @brief An ArrayRef<> wrapper that uses a transformation function for element access.
+ */
+template <typename BaseType, typename Function>
+class TransformArrayRef {
+ private:
+  using Iter = TransformIterator<typename ArrayRef<BaseType>::iterator, Function>;
+
+  // The Function may take a non-const reference, so const_iterator may not exist.
+  using FallbackConstIter = std::iterator<std::random_access_iterator_tag, void, void, void, void>;
+  using PreferredConstIter =
+      TransformIterator<typename ArrayRef<BaseType>::const_iterator, Function>;
+  template <typename F, typename = typename std::result_of<F(const BaseType&)>::type>
+  static PreferredConstIter ConstIterHelper(int&);
+  template <typename F>
+  static FallbackConstIter ConstIterHelper(const int&);
+
+  using ConstIter = decltype(ConstIterHelper<Function>(*reinterpret_cast<int*>(0)));
+
+ public:
+  using value_type = typename Iter::value_type;
+  using reference = typename Iter::reference;
+  using const_reference = typename ConstIter::reference;
+  using pointer = typename Iter::pointer;
+  using const_pointer = typename ConstIter::pointer;
+  using iterator = Iter;
+  using const_iterator = typename std::conditional<
+      std::is_same<ConstIter, FallbackConstIter>::value,
+      void,
+      ConstIter>::type;
+  using reverse_iterator = std::reverse_iterator<Iter>;
+  using const_reverse_iterator = typename std::conditional<
+      std::is_same<ConstIter, FallbackConstIter>::value,
+      void,
+      std::reverse_iterator<ConstIter>>::type;
+  using difference_type = typename ArrayRef<BaseType>::difference_type;
+  using size_type = typename ArrayRef<BaseType>::size_type;
+
+  // Constructors.
+
+  TransformArrayRef(const TransformArrayRef& other) = default;
+
+  template <typename OtherBT>
+  TransformArrayRef(const ArrayRef<OtherBT>& base, Function fn)
+      : data_(base, fn) { }
+
+  template <typename OtherBT,
+            typename = typename std::enable_if<std::is_same<BaseType, const OtherBT>::value>::type>
+  TransformArrayRef(const TransformArrayRef<OtherBT, Function>& other)
+      : TransformArrayRef(other.base(), other.GetFunction()) { }
+
+  // Assignment operators.
+
+  TransformArrayRef& operator=(const TransformArrayRef& other) = default;
+
+  template <typename OtherBT,
+            typename = typename std::enable_if<std::is_same<BaseType, const OtherBT>::value>::type>
+  TransformArrayRef& operator=(const TransformArrayRef<OtherBT, Function>& other) {
+    return *this = TransformArrayRef(other.base(), other.GetFunction());
+  }
+
+  // Destructor.
+  ~TransformArrayRef() = default;
+
+  // Iterators.
+  iterator begin() { return MakeIterator(base().begin()); }
+  const_iterator begin() const { return MakeIterator(base().cbegin()); }
+  const_iterator cbegin() const { return MakeIterator(base().cbegin()); }
+  iterator end() { return MakeIterator(base().end()); }
+  const_iterator end() const { MakeIterator(base().cend()); }
+  const_iterator cend() const { return MakeIterator(base().cend()); }
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+  const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+  const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
+
+  // Size.
+  size_type size() const { return base().size(); }
+  bool empty() const { return base().empty(); }
+
+  // Element access. NOTE: Not providing data().
+
+  reference operator[](size_type n) { return GetFunction()(base()[n]); }
+  const_reference operator[](size_type n) const { return GetFunction()(base()[n]); }
+
+  reference front() { return GetFunction()(base().front()); }
+  const_reference front() const { return GetFunction()(base().front()); }
+
+  reference back() { return GetFunction()(base().back()); }
+  const_reference back() const { return GetFunction()(base().back()); }
+
+  TransformArrayRef SubArray(size_type pos) {
+    return TransformArrayRef(base().subarray(pos), GetFunction());
+  }
+  TransformArrayRef SubArray(size_type pos) const {
+    return TransformArrayRef(base().subarray(pos), GetFunction());
+  }
+  TransformArrayRef SubArray(size_type pos, size_type length) const {
+    return TransformArrayRef(base().subarray(pos, length), GetFunction());
+  }
+
+  // Retrieve the base ArrayRef<>.
+  ArrayRef<BaseType> base() {
+    return data_.base_;
+  }
+  ArrayRef<const BaseType> base() const {
+    return ArrayRef<const BaseType>(data_.base_);
+  }
+
+ private:
+  // Allow EBO for state-less Function.
+  struct Data : Function {
+   public:
+    Data(ArrayRef<BaseType> base, Function fn) : Function(fn), base_(base) { }
+
+    ArrayRef<BaseType> base_;
+  };
+
+  const Function& GetFunction() const {
+    return static_cast<const Function&>(data_);
+  }
+
+  template <typename BaseIterator>
+  auto MakeIterator(BaseIterator base) const {
+    return MakeTransformIterator(base, GetFunction());
+  }
+
+  Data data_;
+
+  template <typename OtherBT, typename OtherFunction>
+  friend class TransformArrayRef;
+};
+
+template <typename BaseType, typename Function>
+bool operator==(const TransformArrayRef<BaseType, Function>& lhs,
+                const TransformArrayRef<BaseType, Function>& rhs) {
+  return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+}
+
+template <typename BaseType, typename Function>
+bool operator!=(const TransformArrayRef<BaseType, Function>& lhs,
+                const TransformArrayRef<BaseType, Function>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename ValueType, typename Function>
+TransformArrayRef<ValueType, Function> MakeTransformArrayRef(
+    ArrayRef<ValueType> container, Function f) {
+  return TransformArrayRef<ValueType, Function>(container, f);
+}
+
+template <typename Container, typename Function>
+TransformArrayRef<typename Container::value_type, Function> MakeTransformArrayRef(
+    Container& container, Function f) {
+  return TransformArrayRef<typename Container::value_type, Function>(
+      ArrayRef<typename Container::value_type>(container.data(), container.size()), f);
+}
+
+template <typename Container, typename Function>
+TransformArrayRef<const typename Container::value_type, Function> MakeTransformArrayRef(
+    const Container& container, Function f) {
+  return TransformArrayRef<const typename Container::value_type, Function>(
+      ArrayRef<const typename Container::value_type>(container.data(), container.size()), f);
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
diff --git a/compiler/utils/transform_array_ref_test.cc b/compiler/utils/transform_array_ref_test.cc
new file mode 100644
index 0000000..8d71fd7
--- /dev/null
+++ b/compiler/utils/transform_array_ref_test.cc
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "utils/transform_array_ref.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+struct ValueHolder {
+  // Deliberately not explicit.
+  ValueHolder(int v) : value(v) { }  // NOLINT
+  int value;
+};
+
+ATTRIBUTE_UNUSED bool operator==(const ValueHolder& lhs, const ValueHolder& rhs) {
+  return lhs.value == rhs.value;
+}
+
+}  // anonymous namespace
+
+TEST(TransformArrayRef, ConstRefAdd1) {
+  auto add1 = [](const ValueHolder& h) { return h.value + 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 6, 4, 0 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, add1);
+  using TarefIter = decltype(taref)::iterator;
+  using ConstTarefIter = decltype(taref)::const_iterator;
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<TarefIter, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<ConstTarefIter, decltype(taref)::const_pointer>::value,
+                "const_pointer");
+  static_assert(std::is_same<int, decltype(taref)::const_reference>::value, "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 7, 5, 1 }), output);
+  output.clear();
+
+  std::copy(taref.cbegin(), taref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 7, 5, 1 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 5, 7, 8 }), output);
+  output.clear();
+
+  std::copy(taref.crbegin(), taref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 5, 7, 8 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value + 1, taref.front());
+  ASSERT_EQ(input.back().value + 1, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value + 1, taref[i]);
+  }
+}
+
+TEST(TransformArrayRef, NonConstRefSub1) {
+  auto sub1 = [](ValueHolder& h) { return h.value - 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 4, 4, 5, 7, 10 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, sub1);
+  using TarefIter = decltype(taref)::iterator;
+  static_assert(std::is_same<void, decltype(taref)::const_iterator>::value, "const_iterator");
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<TarefIter, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<void, decltype(taref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<void, decltype(taref)::const_reference>::value, "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 4, 6, 9 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 6, 4, 3, 3 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value - 1, taref.front());
+  ASSERT_EQ(input.back().value - 1, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value - 1, taref[i]);
+  }
+}
+
+TEST(TransformArrayRef, ConstAndNonConstRef) {
+  struct Ref {
+    int& operator()(ValueHolder& h) const { return h.value; }
+    const int& operator()(const ValueHolder& h) const { return h.value; }
+  };
+  Ref ref;
+  std::vector<ValueHolder> input({ 1, 0, 1, 0, 3, 1 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, ref);
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<const int*, decltype(taref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<const int&, decltype(taref)::const_reference>::value,
+                "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(taref.cbegin(), taref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  std::copy(taref.crbegin(), taref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value, taref.front());
+  ASSERT_EQ(input.back().value, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, taref[i]);
+  }
+
+  // Test writing through the transform iterator.
+  std::vector<int> transform_input({ 24, 37, 11, 71 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  taref = MakeTransformArrayRef(transformed, ref);
+  for (size_t i = 0; i != transform_input.size(); ++i) {
+    taref[i] = transform_input[i];
+  }
+  ASSERT_EQ(std::vector<ValueHolder>({ 24, 37, 11, 71 }), transformed);
+
+  const std::vector<ValueHolder>& cinput = input;
+
+  auto ctaref = MakeTransformArrayRef(cinput, ref);
+  static_assert(std::is_same<int, decltype(ctaref)::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, decltype(ctaref)::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, decltype(ctaref)::reference>::value, "reference");
+  static_assert(std::is_same<const int*, decltype(ctaref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<const int&, decltype(ctaref)::const_reference>::value,
+                "const_reference");
+
+  std::copy(ctaref.begin(), ctaref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.cbegin(), ctaref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.rbegin(), ctaref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.crbegin(), ctaref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  ASSERT_EQ(cinput.size(), ctaref.size());
+  ASSERT_EQ(cinput.empty(), ctaref.empty());
+  ASSERT_EQ(cinput.front().value, ctaref.front());
+  ASSERT_EQ(cinput.back().value, ctaref.back());
+
+  for (size_t i = 0; i != cinput.size(); ++i) {
+    ASSERT_EQ(cinput[i].value, ctaref[i]);
+  }
+
+  // Test conversion adding const.
+  decltype(ctaref) ctaref2 = taref;
+  ASSERT_EQ(taref.size(), ctaref2.size());
+  for (size_t i = 0; i != taref.size(); ++i) {
+    ASSERT_EQ(taref[i], ctaref2[i]);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/transform_iterator.h b/compiler/utils/transform_iterator.h
new file mode 100644
index 0000000..3bc9046
--- /dev/null
+++ b/compiler/utils/transform_iterator.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
+#define ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
+
+#include <iterator>
+#include <type_traits>
+
+#include "base/iteration_range.h"
+
+namespace art {
+
+// The transform iterator transforms values from the base iterator with a given
+// transformation function. It can serve as a replacement for std::transform(), i.e.
+//    std::copy(MakeTransformIterator(begin, f), MakeTransformIterator(end, f), out)
+// is equivalent to
+//    std::transform(begin, end, f)
+// If the function returns an l-value reference or a wrapper that supports assignment,
+// the TransformIterator can be used also as an output iterator, i.e.
+//    std::copy(begin, end, MakeTransformIterator(out, f))
+// is equivalent to
+//    for (auto it = begin; it != end; ++it) {
+//      f(*out++) = *it;
+//    }
+template <typename BaseIterator, typename Function>
+class TransformIterator {
+ private:
+  static_assert(std::is_base_of<
+                    std::input_iterator_tag,
+                    typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+                "Transform iterator base must be an input iterator.");
+
+  using InputType = typename std::iterator_traits<BaseIterator>::reference;
+  using ResultType = typename std::result_of<Function(InputType)>::type;
+
+ public:
+  using iterator_category = typename std::iterator_traits<BaseIterator>::iterator_category;
+  using value_type =
+      typename std::remove_const<typename std::remove_reference<ResultType>::type>::type;
+  using difference_type = typename std::iterator_traits<BaseIterator>::difference_type;
+  using pointer = typename std::conditional<
+      std::is_reference<ResultType>::value,
+      typename std::add_pointer<typename std::remove_reference<ResultType>::type>::type,
+      TransformIterator>::type;
+  using reference = ResultType;
+
+  TransformIterator(BaseIterator base, Function fn)
+      : data_(base, fn) { }
+
+  template <typename OtherBI>
+  TransformIterator(const TransformIterator<OtherBI, Function>& other)
+      : data_(other.base(), other.GetFunction()) {
+  }
+
+  TransformIterator& operator++() {
+    ++data_.base_;
+    return *this;
+  }
+
+  TransformIterator& operator++(int) {
+    TransformIterator tmp(*this);
+    ++*this;
+    return tmp;
+  }
+
+  TransformIterator& operator--() {
+    static_assert(
+        std::is_base_of<std::bidirectional_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be bidirectional iterator to use operator--()");
+    --data_.base_;
+    return *this;
+  }
+
+  TransformIterator& operator--(int) {
+    TransformIterator tmp(*this);
+    --*this;
+    return tmp;
+  }
+
+  reference operator*() const {
+    return GetFunction()(*base());
+  }
+
+  reference operator[](difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator[]");
+    return GetFunction()(base()[n]);
+  }
+
+  TransformIterator operator+(difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator+");
+    return TransformIterator(base() + n, GetFunction());
+  }
+
+  TransformIterator operator-(difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator-");
+    return TransformIterator(base() - n, GetFunction());
+  }
+
+  difference_type operator-(const TransformIterator& other) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator-");
+    return base() - other.base();
+  }
+
+  // Retrieve the base iterator.
+  BaseIterator base() const {
+    return data_.base_;
+  }
+
+  // Retrieve the transformation function.
+  const Function& GetFunction() const {
+    return static_cast<const Function&>(data_);
+  }
+
+ private:
+  // Allow EBO for state-less Function.
+  struct Data : Function {
+   public:
+    Data(BaseIterator base, Function fn) : Function(fn), base_(base) { }
+
+    BaseIterator base_;
+  };
+
+  Data data_;
+};
+
+template <typename BaseIterator1, typename BaseIterator2, typename Function>
+bool operator==(const TransformIterator<BaseIterator1, Function>& lhs,
+                const TransformIterator<BaseIterator2, Function>& rhs) {
+  return lhs.base() == rhs.base();
+}
+
+template <typename BaseIterator1, typename BaseIterator2, typename Function>
+bool operator!=(const TransformIterator<BaseIterator1, Function>& lhs,
+                const TransformIterator<BaseIterator2, Function>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename BaseIterator, typename Function>
+TransformIterator<BaseIterator, Function> MakeTransformIterator(BaseIterator base, Function f) {
+  return TransformIterator<BaseIterator, Function>(base, f);
+}
+
+template <typename BaseRange, typename Function>
+auto MakeTransformRange(BaseRange& range, Function f) {
+  return MakeIterationRange(MakeTransformIterator(range.begin(), f),
+                            MakeTransformIterator(range.end(), f));
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
diff --git a/compiler/utils/transform_iterator_test.cc b/compiler/utils/transform_iterator_test.cc
new file mode 100644
index 0000000..57ff0a6
--- /dev/null
+++ b/compiler/utils/transform_iterator_test.cc
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <forward_list>
+#include <list>
+#include <type_traits>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "utils/transform_iterator.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+struct ValueHolder {
+  // Deliberately not explicit.
+  ValueHolder(int v) : value(v) { }  // NOLINT
+  int value;
+};
+
+bool operator==(const ValueHolder& lhs, const ValueHolder& rhs) {
+  return lhs.value == rhs.value;
+}
+
+}  // anonymous namespace
+
+TEST(TransformIterator, VectorAdd1) {
+  auto add1 = [](const ValueHolder& h) { return h.value + 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 1, 7, 3, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_titer, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_ctiter, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_rtiter, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_crtiter, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), add1),
+            MakeTransformIterator(input.end(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 2, 8, 4, 9 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), add1),
+            MakeTransformIterator(input.cend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 2, 8, 4, 9 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), add1),
+            MakeTransformIterator(input.rend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 4, 8, 2 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), add1),
+            MakeTransformIterator(input.crend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 4, 8, 2 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.begin(), add1)[i]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.cbegin(), add1)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.rbegin(), add1)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.crbegin(), add1)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.end(), add1)[index_from_end]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.cend(), add1)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.rend(), add1)[index_from_rend]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.crend(), add1)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), add1) + i,
+              MakeTransformIterator(input.begin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), add1) + i,
+              MakeTransformIterator(input.cbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), add1) + i,
+              MakeTransformIterator(input.rbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), add1) + i,
+              MakeTransformIterator(input.crbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.end(), add1) - i,
+              MakeTransformIterator(input.end() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), add1) - i,
+              MakeTransformIterator(input.cend() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), add1) - i,
+              MakeTransformIterator(input.rend() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), add1) - i,
+              MakeTransformIterator(input.crend() - i, add1));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), add1) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), add1) - MakeTransformIterator(input.begin(), add1),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), add1);
+  decltype(MakeTransformIterator(input.cbegin(), add1)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+  auto rit = MakeTransformIterator(input.rbegin(), add1);
+  decltype(MakeTransformIterator(input.crbegin(), add1)) crit(rit);
+  static_assert(!std::is_same<decltype(rit), decltype(crit)>::value, "Types must be different");
+  ASSERT_EQ(rit, crit);
+}
+
+TEST(TransformIterator, ListSub1) {
+  auto sub1 = [](const ValueHolder& h) { return h.value - 1; };  // NOLINT [readability/braces]
+  std::list<ValueHolder> input({ 2, 3, 5, 7, 11 });
+  std::vector<int> output;
+
+  using list_titer = decltype(MakeTransformIterator(input.begin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<list_titer, list_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_titer::reference>::value, "reference");
+
+  using list_ctiter = decltype(MakeTransformIterator(input.cbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_ctiter, list_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_ctiter::reference>::value, "reference");
+
+  using list_rtiter = decltype(MakeTransformIterator(input.rbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_rtiter, list_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_rtiter::reference>::value, "reference");
+
+  using list_crtiter = decltype(MakeTransformIterator(input.crbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_crtiter, list_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), sub1),
+            MakeTransformIterator(input.end(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 2, 4, 6, 10 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), sub1),
+            MakeTransformIterator(input.cend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 2, 4, 6, 10 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), sub1),
+            MakeTransformIterator(input.rend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 10, 6, 4, 2, 1 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), sub1),
+            MakeTransformIterator(input.crend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 10, 6, 4, 2, 1  }), output);
+  output.clear();
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), sub1);
+  decltype(MakeTransformIterator(input.cbegin(), sub1)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+}
+
+TEST(TransformIterator, ForwardListSub1) {
+  auto mul3 = [](const ValueHolder& h) { return h.value * 3; };  // NOLINT [readability/braces]
+  std::forward_list<ValueHolder> input({ 1, 1, 2, 3, 5, 8 });
+  std::vector<int> output;
+
+  using flist_titer = decltype(MakeTransformIterator(input.begin(), mul3));
+  static_assert(std::is_same<std::forward_iterator_tag,
+                             flist_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, flist_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<flist_titer, flist_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, flist_titer::reference>::value, "reference");
+
+  using flist_ctiter = decltype(MakeTransformIterator(input.cbegin(), mul3));
+  static_assert(std::is_same<std::forward_iterator_tag,
+                             flist_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, flist_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<flist_ctiter, flist_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, flist_ctiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), mul3),
+            MakeTransformIterator(input.end(), mul3),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 6, 9, 15, 24 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), mul3),
+            MakeTransformIterator(input.cend(), mul3),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 6, 9, 15, 24 }), output);
+  output.clear();
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), mul3);
+  decltype(MakeTransformIterator(input.cbegin(), mul3)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+}
+
+TEST(TransformIterator, VectorConstReference) {
+  auto ref = [](const ValueHolder& h) -> const int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), ref),
+            MakeTransformIterator(input.cend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), ref),
+            MakeTransformIterator(input.crend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cbegin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cend(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), ref) + i,
+              MakeTransformIterator(input.cbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), ref) + i,
+              MakeTransformIterator(input.crbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), ref) - i,
+              MakeTransformIterator(input.cend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), ref) - i,
+              MakeTransformIterator(input.crend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+}
+
+TEST(TransformIterator, VectorNonConstReference) {
+  auto ref = [](ValueHolder& h) -> int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_titer::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_rtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test writing through the transform iterator.
+  std::list<int> transform_input({ 1, -1, 2, -2, 3, -3 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  std::transform(transform_input.begin(),
+                 transform_input.end(),
+                 MakeTransformIterator(transformed.begin(), ref),
+                 [](int v) { return -2 * v; });
+  ASSERT_EQ(std::vector<ValueHolder>({ -2, 2, -4, 4, -6, 6 }), transformed);
+}
+
+TEST(TransformIterator, VectorConstAndNonConstReference) {
+  struct Ref {
+    int& operator()(ValueHolder& h) const { return h.value; }
+    const int& operator()(const ValueHolder& h) const { return h.value; }
+  };
+  Ref ref;
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  // static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  // static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), ref),
+            MakeTransformIterator(input.cend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), ref),
+            MakeTransformIterator(input.crend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cbegin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cend(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), ref) + i,
+              MakeTransformIterator(input.cbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), ref) + i,
+              MakeTransformIterator(input.crbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), ref) - i,
+              MakeTransformIterator(input.cend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), ref) - i,
+              MakeTransformIterator(input.crend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), ref);
+  decltype(MakeTransformIterator(input.cbegin(), ref)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+  auto rit = MakeTransformIterator(input.rbegin(), ref);
+  decltype(MakeTransformIterator(input.crbegin(), ref)) crit(rit);
+  static_assert(!std::is_same<decltype(rit), decltype(crit)>::value, "Types must be different");
+  ASSERT_EQ(rit, crit);
+
+  // Test writing through the transform iterator.
+  std::list<int> transform_input({ 42, 73, 11, 17 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  std::transform(transform_input.begin(),
+                 transform_input.end(),
+                 MakeTransformIterator(transformed.begin(), ref),
+                 [](int v) { return -v; });
+  ASSERT_EQ(std::vector<ValueHolder>({ -42, -73, -11, -17 }), transformed);
+}
+
+TEST(TransformIterator, TransformRange) {
+  auto ref = [](ValueHolder& h) -> int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> data({ 1, 0, 1, 3, 1, 0 });
+
+  for (int& v : MakeTransformRange(data, ref)) {
+    v += 11;
+  }
+  ASSERT_EQ(std::vector<ValueHolder>({ 12, 11, 12, 14, 12, 11 }), data);
+}
+
+}  // namespace art
diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h
new file mode 100644
index 0000000..d0c1656
--- /dev/null
+++ b/compiler/utils/type_reference.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TYPE_REFERENCE_H_
+#define ART_COMPILER_UTILS_TYPE_REFERENCE_H_
+
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "string_reference.h"
+
+namespace art {
+
+class DexFile;
+
+// A type is located by its DexFile and the string_ids_ table index into that DexFile.
+struct TypeReference {
+  TypeReference(const DexFile* file, uint32_t index) : dex_file(file), type_index(index) { }
+
+  const DexFile* dex_file;
+  uint32_t type_index;
+};
+
+// Compare the actual referenced type names. Used for type reference deduplication.
+struct TypeReferenceValueComparator {
+  bool operator()(TypeReference tr1, TypeReference tr2) const {
+    // Note that we want to deduplicate identical boot image types even if they are
+    // referenced by different dex files, so we simply compare the descriptors.
+    StringReference sr1(tr1.dex_file, tr1.dex_file->GetTypeId(tr1.type_index).descriptor_idx_);
+    StringReference sr2(tr2.dex_file, tr2.dex_file->GetTypeId(tr2.type_index).descriptor_idx_);
+    return StringReferenceValueComparator()(sr1, sr2);
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TYPE_REFERENCE_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 2203646..f2ef41f 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -727,6 +727,14 @@
 }
 
 
+void X86Assembler::comiss(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitOperand(a, b);
+}
+
+
 void X86Assembler::comisd(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -736,6 +744,15 @@
 }
 
 
+void X86Assembler::comisd(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitOperand(a, b);
+}
+
+
 void X86Assembler::ucomiss(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1030,6 +1047,14 @@
 }
 
 
+void X86Assembler::cmpb(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x80);
+  EmitOperand(7, address);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1123,6 +1148,23 @@
 }
 
 
+void X86Assembler::testb(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF6);
+  EmitOperand(EAX, dst);
+  CHECK(imm.is_int8());
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
+void X86Assembler::testl(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF7);
+  EmitOperand(0, dst);
+  EmitImmediate(imm);
+}
+
+
 void X86Assembler::andl(Register dst, Register src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x23);
@@ -1918,489 +1960,6 @@
   EmitOperand(reg_or_opcode, operand);
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::X86Core(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = 4;
-
-void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& spill_regs,
-                              const ManagedRegisterEntrySpills& entry_spills) {
-  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
-  cfi_.SetCurrentCFAOffset(4);  // Return address on stack.
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  int gpr_count = 0;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
-    pushl(spill);
-    gpr_count++;
-    cfi_.AdjustCFAOffset(kFramePointerSize);
-    cfi_.RelOffset(DWARFReg(spill), 0);
-  }
-
-  // return address then method on stack.
-  int32_t adjust = frame_size - gpr_count * kFramePointerSize -
-      kFramePointerSize /*method*/ -
-      kFramePointerSize /*return address*/;
-  addl(ESP, Immediate(-adjust));
-  cfi_.AdjustCFAOffset(adjust);
-  pushl(method_reg.AsX86().AsCpuRegister());
-  cfi_.AdjustCFAOffset(kFramePointerSize);
-  DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size);
-
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ManagedRegisterSpill spill = entry_spills.at(i);
-    if (spill.AsX86().IsCpuRegister()) {
-      int offset = frame_size + spill.getSpillOffset();
-      movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
-    } else {
-      DCHECK(spill.AsX86().IsXmmRegister());
-      if (spill.getSize() == 8) {
-        movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      }
-    }
-  }
-}
-
-void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-  // -kFramePointerSize for ArtMethod*.
-  int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize;
-  addl(ESP, Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
-    popl(spill);
-    cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
-    cfi_.Restore(DWARFReg(spill));
-  }
-  ret();
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void X86Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addl(ESP, Immediate(-adjust));
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void X86Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addl(ESP, Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
-  X86ManagedRegister src = msrc.AsX86();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    movl(Address(ESP, offs), src.AsCpuRegister());
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    movl(Address(ESP, offs), src.AsRegisterPairLow());
-    movl(Address(ESP, FrameOffset(offs.Int32Value()+4)),
-         src.AsRegisterPairHigh());
-  } else if (src.IsX87Register()) {
-    if (size == 4) {
-      fstps(Address(ESP, offs));
-    } else {
-      fstpl(Address(ESP, offs));
-    }
-  } else {
-    CHECK(src.IsXmmRegister());
-    if (size == 4) {
-      movss(Address(ESP, offs), src.AsXmmRegister());
-    } else {
-      movsd(Address(ESP, offs), src.AsXmmRegister());
-    }
-  }
-}
-
-void X86Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  X86ManagedRegister src = msrc.AsX86();
-  CHECK(src.IsCpuRegister());
-  movl(Address(ESP, dest), src.AsCpuRegister());
-}
-
-void X86Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  X86ManagedRegister src = msrc.AsX86();
-  CHECK(src.IsCpuRegister());
-  movl(Address(ESP, dest), src.AsCpuRegister());
-}
-
-void X86Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                         ManagedRegister) {
-  movl(Address(ESP, dest), Immediate(imm));
-}
-
-void X86Assembler::StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm,
-                                          ManagedRegister) {
-  fs()->movl(Address::Absolute(dest), Immediate(imm));
-}
-
-void X86Assembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs,
-                                            FrameOffset fr_offs,
-                                            ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
-  fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
-}
-
-void X86Assembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs) {
-  fs()->movl(Address::Absolute(thr_offs), ESP);
-}
-
-void X86Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/,
-                                 FrameOffset /*in_off*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
-}
-
-void X86Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    movl(dest.AsCpuRegister(), Address(ESP, src));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    movl(dest.AsRegisterPairLow(), Address(ESP, src));
-    movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      flds(Address(ESP, src));
-    } else {
-      fldl(Address(ESP, src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      movss(dest.AsXmmRegister(), Address(ESP, src));
-    } else {
-      movsd(dest.AsXmmRegister(), Address(ESP, src));
-    }
-  }
-}
-
-void X86Assembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src));
-    fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset<4>(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      fs()->flds(Address::Absolute(src));
-    } else {
-      fs()->fldl(Address::Absolute(src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      fs()->movss(dest.AsXmmRegister(), Address::Absolute(src));
-    } else {
-      fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src));
-    }
-  }
-}
-
-void X86Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(ESP, src));
-}
-
-void X86Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                           bool unpoison_reference) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dest.AsCpuRegister());
-  }
-}
-
-void X86Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                              Offset offs) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
-}
-
-void X86Assembler::LoadRawPtrFromThread32(ManagedRegister mdest,
-                                        ThreadOffset<4> offs) {
-  X86ManagedRegister dest = mdest.AsX86();
-  CHECK(dest.IsCpuRegister());
-  fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
-}
-
-void X86Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  X86ManagedRegister reg = mreg.AsX86();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movsxb(reg.AsCpuRegister(), reg.AsByteRegister());
-  } else {
-    movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  X86ManagedRegister reg = mreg.AsX86();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movzxb(reg.AsCpuRegister(), reg.AsByteRegister());
-  } else {
-    movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
-  X86ManagedRegister dest = mdest.AsX86();
-  X86ManagedRegister src = msrc.AsX86();
-  if (!dest.Equals(src)) {
-    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
-      movl(dest.AsCpuRegister(), src.AsCpuRegister());
-    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
-      // Pass via stack and pop X87 register
-      subl(ESP, Immediate(16));
-      if (size == 4) {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstps(Address(ESP, 0));
-        movss(dest.AsXmmRegister(), Address(ESP, 0));
-      } else {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstpl(Address(ESP, 0));
-        movsd(dest.AsXmmRegister(), Address(ESP, 0));
-      }
-      addl(ESP, Immediate(16));
-    } else {
-      // TODO: x87, SSE
-      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
-    }
-  }
-}
-
-void X86Assembler::CopyRef(FrameOffset dest, FrameOffset src,
-                           ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  movl(scratch.AsCpuRegister(), Address(ESP, src));
-  movl(Address(ESP, dest), scratch.AsCpuRegister());
-}
-
-void X86Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                        ThreadOffset<4> thr_offs,
-                                        ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
-  Store(fr_offs, scratch, 4);
-}
-
-void X86Assembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs,
-                                      FrameOffset fr_offs,
-                                      ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  Load(scratch, fr_offs, 4);
-  fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
-}
-
-void X86Assembler::Copy(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch,
-                        size_t size) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
-  }
-}
-
-void X86Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void X86Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister scratch, size_t size) {
-  CHECK(scratch.IsNoRegister());
-  CHECK_EQ(size, 4u);
-  pushl(Address(ESP, src));
-  popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset));
-}
-
-void X86Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  movl(scratch, Address(ESP, src_base));
-  movl(scratch, Address(scratch, src_offset));
-  movl(Address(ESP, dest), scratch);
-}
-
-void X86Assembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister scratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  CHECK(scratch.IsNoRegister());
-  pushl(Address(src.AsX86().AsCpuRegister(), src_offset));
-  popl(Address(dest.AsX86().AsCpuRegister(), dest_offset));
-}
-
-void X86Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  CHECK_EQ(dest.Int32Value(), src.Int32Value());
-  movl(scratch, Address(ESP, src));
-  pushl(Address(scratch, src_offset));
-  popl(Address(scratch, dest_offset));
-}
-
-void X86Assembler::MemoryBarrier(ManagedRegister) {
-  mfence();
-}
-
-void X86Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  X86ManagedRegister out_reg = mout_reg.AsX86();
-  X86ManagedRegister in_reg = min_reg.AsX86();
-  CHECK(in_reg.IsCpuRegister());
-  CHECK(out_reg.IsCpuRegister());
-  VerifyObject(in_reg, null_allowed);
-  if (null_allowed) {
-    Label null_arg;
-    if (!out_reg.Equals(in_reg)) {
-      xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-    }
-    testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-    j(kZero, &null_arg);
-    leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
-  }
-}
-
-void X86Assembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  if (null_allowed) {
-    Label null_arg;
-    movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
-    j(kZero, &null_arg);
-    leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-  }
-  Store(out_off, scratch, 4);
-}
-
-// Given a handle scope entry, load the associated reference.
-void X86Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  X86ManagedRegister out_reg = mout_reg.AsX86();
-  X86ManagedRegister in_reg = min_reg.AsX86();
-  CHECK(out_reg.IsCpuRegister());
-  CHECK(in_reg.IsCpuRegister());
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-  }
-  testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-  j(kZero, &null_arg);
-  movl(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
-  Bind(&null_arg);
-}
-
-void X86Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
-  X86ManagedRegister base = mbase.AsX86();
-  CHECK(base.IsCpuRegister());
-  call(Address(base.AsCpuRegister(), offset.Int32Value()));
-  // TODO: place reference map on call
-}
-
-void X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  movl(scratch, Address(ESP, base));
-  call(Address(scratch, offset));
-}
-
-void X86Assembler::CallFromThread32(ThreadOffset<4> offset, ManagedRegister /*mscratch*/) {
-  fs()->call(Address::Absolute(offset));
-}
-
-void X86Assembler::GetCurrentThread(ManagedRegister tr) {
-  fs()->movl(tr.AsX86().AsCpuRegister(),
-             Address::Absolute(Thread::SelfOffset<4>()));
-}
-
-void X86Assembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<4>()));
-  movl(Address(ESP, offset), scratch.AsCpuRegister());
-}
-
-void X86Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86ExceptionSlowPath* slow = new (GetArena()) X86ExceptionSlowPath(stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<4>()), Immediate(0));
-  j(kNotEqual, slow->Entry());
-}
-
-void X86ExceptionSlowPath::Emit(Assembler *sasm) {
-  X86Assembler* sp_asm = down_cast<X86Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  // Note: the return value is dead
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception as argument in EAX
-  __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset<4>()));
-  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(4, pDeliverException)));
-  // this call should never return
-  __ int3();
-#undef __
-}
-
 void X86Assembler::AddConstantArea() {
   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   // Generate the data for the literal area.
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8567ad2..2ddcd76 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -21,6 +21,7 @@
 
 #include "base/arena_containers.h"
 #include "base/bit_utils.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_x86.h"
 #include "globals.h"
@@ -195,7 +196,7 @@
     return result;
   }
 
-  static Address Absolute(ThreadOffset<4> addr) {
+  static Address Absolute(ThreadOffset32 addr) {
     return Absolute(addr.Int32Value());
   }
 
@@ -423,7 +424,9 @@
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
   void comiss(XmmRegister a, XmmRegister b);
+  void comiss(XmmRegister a, const Address& b);
   void comisd(XmmRegister a, XmmRegister b);
+  void comisd(XmmRegister a, const Address& b);
   void ucomiss(XmmRegister a, XmmRegister b);
   void ucomiss(XmmRegister a, const Address& b);
   void ucomisd(XmmRegister a, XmmRegister b);
@@ -479,6 +482,7 @@
   void xchgl(Register dst, Register src);
   void xchgl(Register reg, const Address& address);
 
+  void cmpb(const Address& address, const Immediate& imm);
   void cmpw(const Address& address, const Immediate& imm);
 
   void cmpl(Register reg, const Immediate& imm);
@@ -492,6 +496,9 @@
   void testl(Register reg, const Immediate& imm);
   void testl(Register reg1, const Address& address);
 
+  void testb(const Address& dst, const Immediate& imm);
+  void testl(const Address& dst, const Immediate& imm);
+
   void andl(Register dst, const Immediate& imm);
   void andl(Register dst, Register src);
   void andl(Register dst, const Address& address);
@@ -628,123 +635,6 @@
   void Bind(NearLabel* label);
 
   //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void MemoryBarrier(ManagedRegister) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
-  //
   // Heap poisoning.
   //
 
@@ -752,6 +642,12 @@
   void PoisonHeapReference(Register reg) { negl(reg); }
   // Unpoison a heap reference contained in `reg`.
   void UnpoisonHeapReference(Register reg) { negl(reg); }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(Register reg) {
     if (kPoisonHeapReferences) {
@@ -841,15 +737,6 @@
   EmitUint8(0x66);
 }
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class X86ExceptionSlowPath FINAL : public SlowPath {
- public:
-  explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const size_t stack_adjust_;
-};
-
 }  // namespace x86
 }  // namespace art
 
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 1d1df6e..61d70d7 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -322,18 +322,43 @@
   DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
 }
 
+TEST_F(AssemblerX86Test, ComissAddr) {
+  GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
+  const char* expected = "comiss 0(%EAX), %xmm0\n";
+  DriverStr(expected, "comiss");
+}
+
 TEST_F(AssemblerX86Test, UComissAddr) {
   GetAssembler()->ucomiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
   const char* expected = "ucomiss 0(%EAX), %xmm0\n";
   DriverStr(expected, "ucomiss");
 }
 
+TEST_F(AssemblerX86Test, ComisdAddr) {
+  GetAssembler()->comisd(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
+  const char* expected = "comisd 0(%EAX), %xmm0\n";
+  DriverStr(expected, "comisd");
+}
+
 TEST_F(AssemblerX86Test, UComisdAddr) {
   GetAssembler()->ucomisd(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
   const char* expected = "ucomisd 0(%EAX), %xmm0\n";
   DriverStr(expected, "ucomisd");
 }
 
+TEST_F(AssemblerX86Test, RoundSS) {
+  GetAssembler()->roundss(
+      x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1), x86::Immediate(1));
+  const char* expected = "roundss $1, %xmm1, %xmm0\n";
+  DriverStr(expected, "roundss");
+}
+
+TEST_F(AssemblerX86Test, RoundSD) {
+  GetAssembler()->roundsd(
+      x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1), x86::Immediate(1));
+  const char* expected = "roundsd $1, %xmm1, %xmm0\n";
+  DriverStr(expected, "roundsd");
+}
 
 TEST_F(AssemblerX86Test, CmovlAddress) {
   GetAssembler()->cmovl(x86::kEqual, x86::Register(x86::EAX), x86::Address(
@@ -350,6 +375,42 @@
   DriverStr(expected, "cmovl_address");
 }
 
+TEST_F(AssemblerX86Test, TestbAddressImmediate) {
+  GetAssembler()->testb(
+      x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
+      x86::Immediate(1));
+  GetAssembler()->testb(
+      x86::Address(x86::Register(x86::ESP), FrameOffset(7)),
+      x86::Immediate(-128));
+  GetAssembler()->testb(
+      x86::Address(x86::Register(x86::EBX), MemberOffset(130)),
+      x86::Immediate(127));
+  const char* expected =
+      "testb $1, 0xc(%EDI,%EBX,4)\n"
+      "testb $-128, 0x7(%ESP)\n"
+      "testb $127, 0x82(%EBX)\n";
+
+  DriverStr(expected, "TestbAddressImmediate");
+}
+
+TEST_F(AssemblerX86Test, TestlAddressImmediate) {
+  GetAssembler()->testl(
+      x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
+      x86::Immediate(1));
+  GetAssembler()->testl(
+      x86::Address(x86::Register(x86::ESP), FrameOffset(7)),
+      x86::Immediate(-100000));
+  GetAssembler()->testl(
+      x86::Address(x86::Register(x86::EBX), MemberOffset(130)),
+      x86::Immediate(77777777));
+  const char* expected =
+      "testl $1, 0xc(%EDI,%EBX,4)\n"
+      "testl $-100000, 0x7(%ESP)\n"
+      "testl $77777777, 0x82(%EBX)\n";
+
+  DriverStr(expected, "TestlAddressImmediate");
+}
+
 /////////////////
 // Near labels //
 /////////////////
@@ -389,4 +450,10 @@
   DriverStr(expected, "near_label");
 }
 
+TEST_F(AssemblerX86Test, Cmpb) {
+  GetAssembler()->cmpb(x86::Address(x86::EDI, 128), x86::Immediate(0));
+  const char* expected = "cmpb $0, 128(%EDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h
index 2dfb65c..0bc1560 100644
--- a/compiler/utils/x86/constants_x86.h
+++ b/compiler/utils/x86/constants_x86.h
@@ -97,6 +97,8 @@
   kNotZero      = kNotEqual,
   kNegative     = kSign,
   kPositive     = kNotSign,
+  kCarrySet     = kBelow,
+  kCarryClear   = kAboveEqual,
   kUnordered    = kParityEven
 };
 
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
new file mode 100644
index 0000000..77af885
--- /dev/null
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_x86.h"
+
+#include "utils/assembler.h"
+#include "base/casts.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "thread.h"
+
+namespace art {
+namespace x86 {
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class X86ExceptionSlowPath FINAL : public SlowPath {
+ public:
+  explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
+  virtual void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const size_t stack_adjust_;
+};
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86Core(static_cast<int>(reg));
+}
+
+constexpr size_t kFramePointerSize = 4;
+
+#define __ asm_.
+
+void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                      ManagedRegister method_reg,
+                                      ArrayRef<const ManagedRegister> spill_regs,
+                                      const ManagedRegisterEntrySpills& entry_spills) {
+  DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
+  cfi().SetCurrentCFAOffset(4);  // Return address on stack.
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  int gpr_count = 0;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
+    __ pushl(spill);
+    gpr_count++;
+    cfi().AdjustCFAOffset(kFramePointerSize);
+    cfi().RelOffset(DWARFReg(spill), 0);
+  }
+
+  // return address then method on stack.
+  int32_t adjust = frame_size - gpr_count * kFramePointerSize -
+      kFramePointerSize /*method*/ -
+      kFramePointerSize /*return address*/;
+  __ addl(ESP, Immediate(-adjust));
+  cfi().AdjustCFAOffset(adjust);
+  __ pushl(method_reg.AsX86().AsCpuRegister());
+  cfi().AdjustCFAOffset(kFramePointerSize);
+  DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size);
+
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    if (spill.AsX86().IsCpuRegister()) {
+      int offset = frame_size + spill.getSpillOffset();
+      __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
+    } else {
+      DCHECK(spill.AsX86().IsXmmRegister());
+      if (spill.getSize() == 8) {
+        __ movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
+      }
+    }
+  }
+}
+
+void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                       ArrayRef<const ManagedRegister> spill_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+  // -kFramePointerSize for ArtMethod*.
+  int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize;
+  __ addl(ESP, Immediate(adjust));
+  cfi().AdjustCFAOffset(-adjust);
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
+    __ popl(spill);
+    cfi().AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+    cfi().Restore(DWARFReg(spill));
+  }
+  __ ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void X86JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  __ addl(ESP, Immediate(-adjust));
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(X86Assembler* assembler, size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  assembler->addl(ESP, Immediate(adjust));
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void X86JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(&asm_, adjust);
+}
+
+void X86JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  X86ManagedRegister src = msrc.AsX86();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ movl(Address(ESP, offs), src.AsCpuRegister());
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ movl(Address(ESP, offs), src.AsRegisterPairLow());
+    __ movl(Address(ESP, FrameOffset(offs.Int32Value()+4)), src.AsRegisterPairHigh());
+  } else if (src.IsX87Register()) {
+    if (size == 4) {
+      __ fstps(Address(ESP, offs));
+    } else {
+      __ fstpl(Address(ESP, offs));
+    }
+  } else {
+    CHECK(src.IsXmmRegister());
+    if (size == 4) {
+      __ movss(Address(ESP, offs), src.AsXmmRegister());
+    } else {
+      __ movsd(Address(ESP, offs), src.AsXmmRegister());
+    }
+  }
+}
+
+void X86JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  X86ManagedRegister src = msrc.AsX86();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(ESP, dest), src.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  X86ManagedRegister src = msrc.AsX86();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(ESP, dest), src.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister) {
+  __ movl(Address(ESP, dest), Immediate(imm));
+}
+
+void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                                    FrameOffset fr_offs,
+                                                    ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
+  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
+  __ fs()->movl(Address::Absolute(thr_offs), ESP);
+}
+
+void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
+                                         ManagedRegister /*src*/,
+                                         FrameOffset /*in_off*/,
+                                         ManagedRegister /*scratch*/) {
+  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
+}
+
+void X86JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ movl(dest.AsCpuRegister(), Address(ESP, src));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ movl(dest.AsRegisterPairLow(), Address(ESP, src));
+    __ movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ flds(Address(ESP, src));
+    } else {
+      __ fldl(Address(ESP, src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ movss(dest.AsXmmRegister(), Address(ESP, src));
+    } else {
+      __ movsd(dest.AsXmmRegister(), Address(ESP, src));
+    }
+  }
+}
+
+void X86JNIMacroAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src));
+    __ fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset32(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ fs()->flds(Address::Absolute(src));
+    } else {
+      __ fs()->fldl(Address::Absolute(src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ fs()->movss(dest.AsXmmRegister(), Address::Absolute(src));
+    } else {
+      __ fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src));
+    }
+  }
+}
+
+void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(ESP, src));
+}
+
+void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
+                           bool unpoison_reference) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dest.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
+                                      ManagedRegister base,
+                                      Offset offs) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
+}
+
+void X86JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
+  X86ManagedRegister dest = mdest.AsX86();
+  CHECK(dest.IsCpuRegister());
+  __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
+}
+
+void X86JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  X86ManagedRegister reg = mreg.AsX86();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movsxb(reg.AsCpuRegister(), reg.AsByteRegister());
+  } else {
+    __ movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  X86ManagedRegister reg = mreg.AsX86();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movzxb(reg.AsCpuRegister(), reg.AsByteRegister());
+  } else {
+    __ movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  X86ManagedRegister dest = mdest.AsX86();
+  X86ManagedRegister src = msrc.AsX86();
+  if (!dest.Equals(src)) {
+    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
+      __ movl(dest.AsCpuRegister(), src.AsCpuRegister());
+    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
+      // Pass via stack and pop X87 register
+      __ subl(ESP, Immediate(16));
+      if (size == 4) {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstps(Address(ESP, 0));
+        __ movss(dest.AsXmmRegister(), Address(ESP, 0));
+      } else {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstpl(Address(ESP, 0));
+        __ movsd(dest.AsXmmRegister(), Address(ESP, 0));
+      }
+      __ addl(ESP, Immediate(16));
+    } else {
+      // TODO: x87, SSE
+      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
+    }
+  }
+}
+
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ movl(scratch.AsCpuRegister(), Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                ThreadOffset32 thr_offs,
+                                                ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
+  Store(fr_offs, scratch, 4);
+}
+
+void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                              FrameOffset fr_offs,
+                                              ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  Load(scratch, fr_offs, 4);
+  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src,
+                        ManagedRegister mscratch,
+                        size_t size) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  if (scratch.IsCpuRegister() && size == 8) {
+    Load(scratch, src, 4);
+    Store(dest, scratch, 4);
+    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
+    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+  } else {
+    Load(scratch, src, size);
+    Store(dest, scratch, size);
+  }
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                ManagedRegister /*src_base*/,
+                                Offset /*src_offset*/,
+                                ManagedRegister /*scratch*/,
+                                size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void X86JNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                ManagedRegister scratch,
+                                size_t size) {
+  CHECK(scratch.IsNoRegister());
+  CHECK_EQ(size, 4u);
+  __ pushl(Address(ESP, src));
+  __ popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset));
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest,
+                                FrameOffset src_base,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  __ movl(scratch, Address(ESP, src_base));
+  __ movl(scratch, Address(scratch, src_offset));
+  __ movl(Address(ESP, dest), scratch);
+}
+
+void X86JNIMacroAssembler::Copy(ManagedRegister dest,
+                                Offset dest_offset,
+                                ManagedRegister src,
+                                Offset src_offset,
+                                ManagedRegister scratch,
+                                size_t size) {
+  CHECK_EQ(size, 4u);
+  CHECK(scratch.IsNoRegister());
+  __ pushl(Address(src.AsX86().AsCpuRegister(), src_offset));
+  __ popl(Address(dest.AsX86().AsCpuRegister(), dest_offset));
+}
+
+void X86JNIMacroAssembler::Copy(FrameOffset dest,
+                                Offset dest_offset,
+                                FrameOffset src,
+                                Offset src_offset,
+                                ManagedRegister mscratch,
+                                size_t size) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  CHECK_EQ(dest.Int32Value(), src.Int32Value());
+  __ movl(scratch, Address(ESP, src));
+  __ pushl(Address(scratch, src_offset));
+  __ popl(Address(scratch, dest_offset));
+}
+
+void X86JNIMacroAssembler::MemoryBarrier(ManagedRegister) {
+  __ mfence();
+}
+
+void X86JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister min_reg,
+                                                  bool null_allowed) {
+  X86ManagedRegister out_reg = mout_reg.AsX86();
+  X86ManagedRegister in_reg = min_reg.AsX86();
+  CHECK(in_reg.IsCpuRegister());
+  CHECK(out_reg.IsCpuRegister());
+  VerifyObject(in_reg, null_allowed);
+  if (null_allowed) {
+    Label null_arg;
+    if (!out_reg.Equals(in_reg)) {
+      __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+    }
+    __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
+  }
+}
+
+void X86JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                  FrameOffset handle_scope_offset,
+                                                  ManagedRegister mscratch,
+                                                  bool null_allowed) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  CHECK(scratch.IsCpuRegister());
+  if (null_allowed) {
+    Label null_arg;
+    __ movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+  }
+  Store(out_off, scratch, 4);
+}
+
+// Given a handle scope entry, load the associated reference.
+void X86JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                        ManagedRegister min_reg) {
+  X86ManagedRegister out_reg = mout_reg.AsX86();
+  X86ManagedRegister in_reg = min_reg.AsX86();
+  CHECK(out_reg.IsCpuRegister());
+  CHECK(in_reg.IsCpuRegister());
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+  }
+  __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+  __ j(kZero, &null_arg);
+  __ movl(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
+  __ Bind(&null_arg);
+}
+
+void X86JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86ManagedRegister base = mbase.AsX86();
+  CHECK(base.IsCpuRegister());
+  __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
+  // TODO: place reference map on call
+}
+
+void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  __ movl(scratch, Address(ESP, base));
+  __ call(Address(scratch, offset));
+}
+
+void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
+  __ fs()->call(Address::Absolute(offset));
+}
+
+void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ fs()->movl(tr.AsX86().AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+}
+
+void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset,
+                                    ManagedRegister mscratch) {
+  X86ManagedRegister scratch = mscratch.AsX86();
+  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+  __ movl(Address(ESP, offset), scratch.AsCpuRegister());
+}
+
+void X86JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+  X86ExceptionSlowPath* slow = new (__ GetArena()) X86ExceptionSlowPath(stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0));
+  __ j(kNotEqual, slow->Entry());
+}
+
+#undef __
+
+void X86ExceptionSlowPath::Emit(Assembler *sasm) {
+  X86Assembler* sp_asm = down_cast<X86Assembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  // Note: the return value is dead
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(sp_asm, stack_adjust_);
+  }
+  // Pass exception as argument in EAX
+  __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()));
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pDeliverException)));
+  // this call should never return
+  __ int3();
+#undef __
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
new file mode 100644
index 0000000..3f07ede
--- /dev/null
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
+#define ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
+
+#include <vector>
+
+#include "assembler_x86.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+#include "utils/jni_macro_assembler.h"
+
+namespace art {
+namespace x86 {
+
+class X86JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86Assembler, PointerSize::k32> {
+ public:
+  explicit X86JNIMacroAssembler(ArenaAllocator* arena) : JNIMacroAssemblerFwd(arena) {}
+  virtual ~X86JNIMacroAssembler() {}
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+            ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
+                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
+                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(X86JNIMacroAssembler);
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index fc20d7e..c0c2b65 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -89,64 +89,64 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
-  ByteRegister AsByteRegister() const {
+  constexpr ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
     return static_cast<ByteRegister>(id_);
   }
 
-  Register AsCpuRegister() const {
+  constexpr Register AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return static_cast<Register>(id_);
   }
 
-  XmmRegister AsXmmRegister() const {
+  constexpr XmmRegister AsXmmRegister() const {
     CHECK(IsXmmRegister());
     return static_cast<XmmRegister>(id_ - kNumberOfCpuRegIds);
   }
 
-  X87Register AsX87Register() const {
+  constexpr X87Register AsX87Register() const {
     CHECK(IsX87Register());
     return static_cast<X87Register>(id_ -
                                     (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCpuRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
-  RegisterPair AsRegisterPair() const {
+  constexpr RegisterPair AsRegisterPair() const {
     CHECK(IsRegisterPair());
     return static_cast<RegisterPair>(id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds));
   }
 
-  bool IsCpuRegister() const {
+  constexpr bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
   }
 
-  bool IsXmmRegister() const {
+  constexpr bool IsXmmRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCpuRegIds;
     return (0 <= test) && (test < kNumberOfXmmRegIds);
   }
 
-  bool IsX87Register() const {
+  constexpr bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
     return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
@@ -160,33 +160,33 @@
   // then false is returned.
   bool Overlaps(const X86ManagedRegister& other) const;
 
-  static X86ManagedRegister FromCpuRegister(Register r) {
+  static constexpr X86ManagedRegister FromCpuRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static X86ManagedRegister FromXmmRegister(XmmRegister r) {
+  static constexpr X86ManagedRegister FromXmmRegister(XmmRegister r) {
     CHECK_NE(r, kNoXmmRegister);
     return FromRegId(r + kNumberOfCpuRegIds);
   }
 
-  static X86ManagedRegister FromX87Register(X87Register r) {
+  static constexpr X86ManagedRegister FromX87Register(X87Register r) {
     CHECK_NE(r, kNoX87Register);
     return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
   }
 
-  static X86ManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr X86ManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
                           kNumberOfX87RegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -202,9 +202,9 @@
 
   friend class ManagedRegister;
 
-  explicit X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static X86ManagedRegister FromRegId(int reg_id) {
+  static constexpr X86ManagedRegister FromRegId(int reg_id) {
     X86ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -215,7 +215,7 @@
 
 }  // namespace x86
 
-inline x86::X86ManagedRegister ManagedRegister::AsX86() const {
+constexpr inline x86::X86ManagedRegister ManagedRegister::AsX86() const {
   x86::X86ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 32eb4a3..1f73aa7 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1224,6 +1224,16 @@
 }
 
 
+void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
+  EmitOptionalRex32(address);
+  EmitUint8(0x80);
+  EmitOperand(7, address);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());
@@ -1379,6 +1389,25 @@
 }
 
 
+void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst);
+  EmitUint8(0xF6);
+  EmitOperand(Register::RAX, dst);
+  CHECK(imm.is_int8());
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
+void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst);
+  EmitUint8(0xF7);
+  EmitOperand(0, dst);
+  EmitImmediate(imm);
+}
+
+
 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -2629,543 +2658,6 @@
   }
 }
 
-static dwarf::Reg DWARFReg(Register reg) {
-  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
-}
-static dwarf::Reg DWARFReg(FloatRegister reg) {
-  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
-}
-
-constexpr size_t kFramePointerSize = 8;
-
-void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                 const std::vector<ManagedRegister>& spill_regs,
-                                 const ManagedRegisterEntrySpills& entry_spills) {
-  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
-  cfi_.SetCurrentCFAOffset(8);  // Return address on stack.
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  int gpr_count = 0;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
-    if (spill.IsCpuRegister()) {
-      pushq(spill.AsCpuRegister());
-      gpr_count++;
-      cfi_.AdjustCFAOffset(kFramePointerSize);
-      cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
-    }
-  }
-  // return address then method on stack.
-  int64_t rest_of_frame = static_cast<int64_t>(frame_size)
-                          - (gpr_count * kFramePointerSize)
-                          - kFramePointerSize /*return address*/;
-  subq(CpuRegister(RSP), Immediate(rest_of_frame));
-  cfi_.AdjustCFAOffset(rest_of_frame);
-
-  // spill xmms
-  int64_t offset = rest_of_frame;
-  for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
-    if (spill.IsXmmRegister()) {
-      offset -= sizeof(double);
-      movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
-      cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
-    }
-  }
-
-  DCHECK_EQ(kX86_64PointerSize, kFramePointerSize);
-
-  movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
-
-  for (size_t i = 0; i < entry_spills.size(); ++i) {
-    ManagedRegisterSpill spill = entry_spills.at(i);
-    if (spill.AsX86_64().IsCpuRegister()) {
-      if (spill.getSize() == 8) {
-        movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-             spill.AsX86_64().AsCpuRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsCpuRegister());
-      }
-    } else {
-      if (spill.getSize() == 8) {
-        movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
-      }
-    }
-  }
-}
-
-void X86_64Assembler::RemoveFrame(size_t frame_size,
-                            const std::vector<ManagedRegister>& spill_regs) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  cfi_.RememberState();
-  int gpr_count = 0;
-  // unspill xmms
-  int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
-    if (spill.IsXmmRegister()) {
-      offset += sizeof(double);
-      movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
-      cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
-    } else {
-      gpr_count++;
-    }
-  }
-  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
-  addq(CpuRegister(RSP), Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-  for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
-    if (spill.IsCpuRegister()) {
-      popq(spill.AsCpuRegister());
-      cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
-      cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
-    }
-  }
-  ret();
-  // The CFI should be restored for any code that follows the exit block.
-  cfi_.RestoreState();
-  cfi_.DefCFAOffset(frame_size);
-}
-
-void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
-  cfi_.AdjustCFAOffset(adjust);
-}
-
-void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(adjust));
-  cfi_.AdjustCFAOffset(-adjust);
-}
-
-void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  if (src.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (src.IsCpuRegister()) {
-    if (size == 4) {
-      CHECK_EQ(4u, size);
-      movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
-    } else {
-      CHECK_EQ(8u, size);
-      movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
-    }
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(0u, size);
-    movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow());
-    movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)),
-         src.AsRegisterPairHigh());
-  } else if (src.IsX87Register()) {
-    if (size == 4) {
-      fstps(Address(CpuRegister(RSP), offs));
-    } else {
-      fstpl(Address(CpuRegister(RSP), offs));
-    }
-  } else {
-    CHECK(src.IsXmmRegister());
-    if (size == 4) {
-      movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
-    } else {
-      movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
-    }
-  }
-}
-
-void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  CHECK(src.IsCpuRegister());
-  movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  CHECK(src.IsCpuRegister());
-  movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
-                                            ManagedRegister) {
-  movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
-}
-
-void X86_64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
-                                               ManagedRegister) {
-  gs()->movl(Address::Absolute(dest, true), Immediate(imm));  // TODO(64) movq?
-}
-
-void X86_64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
-                                                 FrameOffset fr_offs,
-                                                 ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
-  gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) {
-  gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
-}
-
-void X86_64Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/,
-                                 FrameOffset /*in_off*/, ManagedRegister /*scratch*/) {
-  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
-}
-
-void X86_64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    if (size == 4) {
-      CHECK_EQ(4u, size);
-      movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-    } else {
-      CHECK_EQ(8u, size);
-      movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-    }
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(0u, size);
-    movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src));
-    movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4)));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      flds(Address(CpuRegister(RSP), src));
-    } else {
-      fldl(Address(CpuRegister(RSP), src));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
-    } else {
-      movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
-    }
-  }
-}
-
-void X86_64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  if (dest.IsNoRegister()) {
-    CHECK_EQ(0u, size);
-  } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
-    gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
-  } else if (dest.IsX87Register()) {
-    if (size == 4) {
-      gs()->flds(Address::Absolute(src, true));
-    } else {
-      gs()->fldl(Address::Absolute(src, true));
-    }
-  } else {
-    CHECK(dest.IsXmmRegister());
-    if (size == 4) {
-      gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
-    } else {
-      gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
-    }
-  }
-}
-
-void X86_64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister());
-  movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-}
-
-void X86_64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                              bool unpoison_reference) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
-  if (unpoison_reference) {
-    MaybeUnpoisonHeapReference(dest.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                              Offset offs) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
-}
-
-void X86_64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  CHECK(dest.IsCpuRegister());
-  gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
-}
-
-void X86_64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
-  X86_64ManagedRegister reg = mreg.AsX86_64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movsxb(reg.AsCpuRegister(), reg.AsCpuRegister());
-  } else {
-    movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
-  X86_64ManagedRegister reg = mreg.AsX86_64();
-  CHECK(size == 1 || size == 2) << size;
-  CHECK(reg.IsCpuRegister()) << reg;
-  if (size == 1) {
-    movzxb(reg.AsCpuRegister(), reg.AsCpuRegister());
-  } else {
-    movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
-  }
-}
-
-void X86_64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
-  X86_64ManagedRegister dest = mdest.AsX86_64();
-  X86_64ManagedRegister src = msrc.AsX86_64();
-  if (!dest.Equals(src)) {
-    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
-      movq(dest.AsCpuRegister(), src.AsCpuRegister());
-    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
-      // Pass via stack and pop X87 register
-      subl(CpuRegister(RSP), Immediate(16));
-      if (size == 4) {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstps(Address(CpuRegister(RSP), 0));
-        movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
-      } else {
-        CHECK_EQ(src.AsX87Register(), ST0);
-        fstpl(Address(CpuRegister(RSP), 0));
-        movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
-      }
-      addq(CpuRegister(RSP), Immediate(16));
-    } else {
-      // TODO: x87, SSE
-      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
-    }
-  }
-}
-
-void X86_64Assembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
-  movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<8> thr_offs,
-                                             ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
-  Store(fr_offs, scratch, 8);
-}
-
-void X86_64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs,
-                                           FrameOffset fr_offs,
-                                           ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  Load(scratch, fr_offs, 8);
-  gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch,
-                           size_t size) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
-  }
-}
-
-void X86_64Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/,
-                        ManagedRegister /*scratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void X86_64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                        ManagedRegister scratch, size_t size) {
-  CHECK(scratch.IsNoRegister());
-  CHECK_EQ(size, 4u);
-  pushq(Address(CpuRegister(RSP), src));
-  popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  movq(scratch, Address(CpuRegister(RSP), src_base));
-  movq(scratch, Address(scratch, src_offset));
-  movq(Address(CpuRegister(RSP), dest), scratch);
-}
-
-void X86_64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
-                        ManagedRegister src, Offset src_offset,
-                        ManagedRegister scratch, size_t size) {
-  CHECK_EQ(size, 4u);
-  CHECK(scratch.IsNoRegister());
-  pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
-  popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
-}
-
-void X86_64Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                        ManagedRegister mscratch, size_t size) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  CHECK_EQ(size, 4u);
-  CHECK_EQ(dest.Int32Value(), src.Int32Value());
-  movq(scratch, Address(CpuRegister(RSP), src));
-  pushq(Address(scratch, src_offset));
-  popq(Address(scratch, dest_offset));
-}
-
-void X86_64Assembler::MemoryBarrier(ManagedRegister) {
-  mfence();
-}
-
-void X86_64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister min_reg, bool null_allowed) {
-  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
-  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
-  if (in_reg.IsNoRegister()) {  // TODO(64): && null_allowed
-    // Use out_reg as indicator of null.
-    in_reg = out_reg;
-    // TODO: movzwl
-    movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-  CHECK(in_reg.IsCpuRegister());
-  CHECK(out_reg.IsCpuRegister());
-  VerifyObject(in_reg, null_allowed);
-  if (null_allowed) {
-    Label null_arg;
-    if (!out_reg.Equals(in_reg)) {
-      xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-    }
-    testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-    j(kZero, &null_arg);
-    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-}
-
-void X86_64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                   FrameOffset handle_scope_offset,
-                                   ManagedRegister mscratch,
-                                   bool null_allowed) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  if (null_allowed) {
-    Label null_arg;
-    movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
-    j(kZero, &null_arg);
-    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    Bind(&null_arg);
-  } else {
-    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-  }
-  Store(out_off, scratch, 8);
-}
-
-// Given a handle scope entry, load the associated reference.
-void X86_64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                         ManagedRegister min_reg) {
-  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
-  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
-  CHECK(out_reg.IsCpuRegister());
-  CHECK(in_reg.IsCpuRegister());
-  Label null_arg;
-  if (!out_reg.Equals(in_reg)) {
-    xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
-  }
-  testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
-  j(kZero, &null_arg);
-  movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
-  Bind(&null_arg);
-}
-
-void X86_64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86_64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
-}
-
-void X86_64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
-  X86_64ManagedRegister base = mbase.AsX86_64();
-  CHECK(base.IsCpuRegister());
-  call(Address(base.AsCpuRegister(), offset.Int32Value()));
-  // TODO: place reference map on call
-}
-
-void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  movq(scratch, Address(CpuRegister(RSP), base));
-  call(Address(scratch, offset));
-}
-
-void X86_64Assembler::CallFromThread64(ThreadOffset<8> offset, ManagedRegister /*mscratch*/) {
-  gs()->call(Address::Absolute(offset, true));
-}
-
-void X86_64Assembler::GetCurrentThread(ManagedRegister tr) {
-  gs()->movq(tr.AsX86_64().AsCpuRegister(), Address::Absolute(Thread::SelfOffset<8>(), true));
-}
-
-void X86_64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  gs()->movq(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<8>(), true));
-  movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
-}
-
-// Slowpath entered when Thread::Current()->_exception is non-null
-class X86_64ExceptionSlowPath FINAL : public SlowPath {
- public:
-  explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const size_t stack_adjust_;
-};
-
-void X86_64Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86_64ExceptionSlowPath* slow = new (GetArena()) X86_64ExceptionSlowPath(stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<8>(), true), Immediate(0));
-  j(kNotEqual, slow->Entry());
-}
-
-void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
-  X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  // Note: the return value is dead
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
-  }
-  // Pass exception as argument in RDI
-  __ gs()->movq(CpuRegister(RDI), Address::Absolute(Thread::ExceptionOffset<8>(), true));
-  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(8, pDeliverException), true));
-  // this call should never return
-  __ int3();
-#undef __
-}
-
 void X86_64Assembler::AddConstantArea() {
   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   for (size_t i = 0, e = area.size(); i < e; i++) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 92c7d0a..3a4bfca 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -28,6 +28,7 @@
 #include "offsets.h"
 #include "utils/array_ref.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 namespace art {
 namespace x86_64 {
@@ -258,7 +259,7 @@
   }
 
   // If no_rip is true then the Absolute address isn't RIP relative.
-  static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
+  static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
     return Absolute(addr.Int32Value(), no_rip);
   }
 
@@ -506,6 +507,7 @@
   void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
+  void cmpb(const Address& address, const Immediate& imm);
   void cmpw(const Address& address, const Immediate& imm);
 
   void cmpl(CpuRegister reg, const Immediate& imm);
@@ -526,6 +528,9 @@
   void testq(CpuRegister reg1, CpuRegister reg2);
   void testq(CpuRegister reg, const Address& address);
 
+  void testb(const Address& address, const Immediate& imm);
+  void testl(const Address& address, const Immediate& imm);
+
   void andl(CpuRegister dst, const Immediate& imm);
   void andl(CpuRegister dst, CpuRegister src);
   void andl(CpuRegister reg, const Address& address);
@@ -698,124 +703,6 @@
   }
   void Bind(NearLabel* label);
 
-  //
-  // Overridden common assembler high-level functionality
-  //
-
-  // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
-
-  // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-      OVERRIDE;
-
-  void IncreaseFrameSize(size_t adjust) OVERRIDE;
-  void DecreaseFrameSize(size_t adjust) OVERRIDE;
-
-  // Store routines
-  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
-  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
-  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
-
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-
-  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-
-  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
-
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) OVERRIDE;
-
-  // Load routines
-  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-
-  void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
-
-  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool unpoison_reference) OVERRIDE;
-
-  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-
-  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE;
-
-  // Copying routines
-  void Move(ManagedRegister dest, ManagedRegister src, size_t size);
-
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-
-  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
-      OVERRIDE;
-
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
-            size_t size) OVERRIDE;
-
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister scratch, size_t size) OVERRIDE;
-
-  void MemoryBarrier(ManagedRegister) OVERRIDE;
-
-  // Sign extension
-  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Zero extension
-  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
-
-  // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  virtual void LoadReferenceFromHandleScope(ManagedRegister dst,
-                                     ManagedRegister src);
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
-  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
-
-  // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch) OVERRIDE;
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
-
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
@@ -857,6 +744,12 @@
   void PoisonHeapReference(CpuRegister reg) { negl(reg); }
   // Unpoison a heap reference contained in `reg`.
   void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(CpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(CpuRegister reg) {
     if (kPoisonHeapReferences) {
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index b19e616..48a1876 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -22,7 +22,9 @@
 
 #include "base/bit_utils.h"
 #include "base/stl_util.h"
+#include "jni_macro_assembler_x86_64.h"
 #include "utils/assembler_test.h"
+#include "utils/jni_macro_assembler_test.h"
 
 namespace art {
 
@@ -37,7 +39,7 @@
   ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
 }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static constexpr size_t kRandomIterations = 1000;  // Devices might be puny, don't stress them...
 #else
 static constexpr size_t kRandomIterations = 100000;  // Hosts are pretty powerful.
@@ -1485,6 +1487,104 @@
   DriverFn(&setcc_test_fn, "setcc");
 }
 
+TEST_F(AssemblerX86_64Test, MovzxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
+}
+
+TEST_F(AssemblerX86_64Test, MovsxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
+}
+
+TEST_F(AssemblerX86_64Test, Repnescasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "Repnescasw");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsw) {
+  GetAssembler()->repe_cmpsw();
+  const char* expected = "repe cmpsw\n";
+  DriverStr(expected, "Repecmpsw");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsl) {
+  GetAssembler()->repe_cmpsl();
+  const char* expected = "repe cmpsl\n";
+  DriverStr(expected, "Repecmpsl");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsq) {
+  GetAssembler()->repe_cmpsq();
+  const char* expected = "repe cmpsq\n";
+  DriverStr(expected, "Repecmpsq");
+}
+
+TEST_F(AssemblerX86_64Test, Cmpb) {
+  GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128),
+                       x86_64::Immediate(0));
+  const char* expected = "cmpb $0, 128(%RDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
+TEST_F(AssemblerX86_64Test, TestbAddressImmediate) {
+  GetAssembler()->testb(
+      x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
+                      x86_64::CpuRegister(x86_64::RBX),
+                      x86_64::TIMES_4,
+                      12),
+      x86_64::Immediate(1));
+  GetAssembler()->testb(
+      x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)),
+      x86_64::Immediate(-128));
+  GetAssembler()->testb(
+      x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)),
+      x86_64::Immediate(127));
+  const char* expected =
+      "testb $1, 0xc(%RDI,%RBX,4)\n"
+      "testb $-128, 0x7(%RSP)\n"
+      "testb $127, 0x82(%RBX)\n";
+
+  DriverStr(expected, "TestbAddressImmediate");
+}
+
+TEST_F(AssemblerX86_64Test, TestlAddressImmediate) {
+  GetAssembler()->testl(
+      x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
+                      x86_64::CpuRegister(x86_64::RBX),
+                      x86_64::TIMES_4,
+                      12),
+      x86_64::Immediate(1));
+  GetAssembler()->testl(
+      x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)),
+      x86_64::Immediate(-100000));
+  GetAssembler()->testl(
+      x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)),
+      x86_64::Immediate(77777777));
+  const char* expected =
+      "testl $1, 0xc(%RDI,%RBX,4)\n"
+      "testl $-100000, 0x7(%RSP)\n"
+      "testl $77777777, 0x82(%RBX)\n";
+
+  DriverStr(expected, "TestlAddressImmediate");
+}
+
+class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler> {
+ public:
+  using Base = JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler>;
+
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "x86_64";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn";
+  }
+
+ private:
+};
+
 static x86_64::X86_64ManagedRegister ManagedFromCpu(x86_64::Register r) {
   return x86_64::X86_64ManagedRegister::FromCpuRegister(r);
 }
@@ -1493,14 +1593,16 @@
   return x86_64::X86_64ManagedRegister::FromXmmRegister(r);
 }
 
-std::string buildframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                               x86_64::X86_64Assembler* assembler) {
+std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                               x86_64::X86_64JNIMacroAssembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
-  std::vector<ManagedRegister> spill_regs;
-  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
-  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+  const ManagedRegister raw_spill_regs[] = {
+      ManagedFromCpu(x86_64::R10),
+      ManagedFromCpu(x86_64::RSI)
+  };
+  ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   // Three random entry spills.
   ManagedRegisterEntrySpills entry_spills;
@@ -1534,18 +1636,20 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, BuildFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, BuildFrame) {
   DriverFn(&buildframe_test_fn, "BuildFrame");
 }
 
-std::string removeframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                x86_64::X86_64Assembler* assembler) {
+std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                                x86_64::X86_64JNIMacroAssembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
-  std::vector<ManagedRegister> spill_regs;
-  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
-  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+  const ManagedRegister raw_spill_regs[] = {
+      ManagedFromCpu(x86_64::R10),
+      ManagedFromCpu(x86_64::RSI)
+  };
+  ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   size_t frame_size = 10 * kStackAlignment;
   assembler->RemoveFrame(10 * kStackAlignment, spill_regs);
@@ -1563,12 +1667,13 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, RemoveFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, RemoveFrame) {
   DriverFn(&removeframe_test_fn, "RemoveFrame");
 }
 
-std::string increaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                  x86_64::X86_64Assembler* assembler) {
+std::string increaseframe_test_fn(
+    JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+    x86_64::X86_64JNIMacroAssembler* assembler) {
   assembler->IncreaseFrameSize(0U);
   assembler->IncreaseFrameSize(kStackAlignment);
   assembler->IncreaseFrameSize(10 * kStackAlignment);
@@ -1582,12 +1687,13 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, IncreaseFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, IncreaseFrame) {
   DriverFn(&increaseframe_test_fn, "IncreaseFrame");
 }
 
-std::string decreaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
-                                  x86_64::X86_64Assembler* assembler) {
+std::string decreaseframe_test_fn(
+    JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+    x86_64::X86_64JNIMacroAssembler* assembler) {
   assembler->DecreaseFrameSize(0U);
   assembler->DecreaseFrameSize(kStackAlignment);
   assembler->DecreaseFrameSize(10 * kStackAlignment);
@@ -1601,40 +1707,8 @@
   return str.str();
 }
 
-TEST_F(AssemblerX86_64Test, DecreaseFrame) {
+TEST_F(JNIMacroAssemblerX86_64Test, DecreaseFrame) {
   DriverFn(&decreaseframe_test_fn, "DecreaseFrame");
 }
 
-TEST_F(AssemblerX86_64Test, MovzxbRegs) {
-  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
-}
-
-TEST_F(AssemblerX86_64Test, MovsxbRegs) {
-  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
-}
-
-TEST_F(AssemblerX86_64Test, Repnescasw) {
-  GetAssembler()->repne_scasw();
-  const char* expected = "repne scasw\n";
-  DriverStr(expected, "Repnescasw");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsw) {
-  GetAssembler()->repe_cmpsw();
-  const char* expected = "repe cmpsw\n";
-  DriverStr(expected, "Repecmpsw");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsl) {
-  GetAssembler()->repe_cmpsl();
-  const char* expected = "repe cmpsl\n";
-  DriverStr(expected, "Repecmpsl");
-}
-
-TEST_F(AssemblerX86_64Test, Repecmpsq) {
-  GetAssembler()->repe_cmpsq();
-  const char* expected = "repe cmpsq\n";
-  DriverStr(expected, "Repecmpsq");
-}
-
 }  // namespace art
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 0c782d4..cc508a1 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -29,15 +29,15 @@
 
 class CpuRegister {
  public:
-  explicit CpuRegister(Register r) : reg_(r) {}
-  explicit CpuRegister(int r) : reg_(Register(r)) {}
-  Register AsRegister() const {
+  explicit constexpr CpuRegister(Register r) : reg_(r) {}
+  explicit constexpr CpuRegister(int r) : reg_(Register(r)) {}
+  constexpr Register AsRegister() const {
     return reg_;
   }
-  uint8_t LowBits() const {
+  constexpr uint8_t LowBits() const {
     return reg_ & 7;
   }
-  bool NeedsRex() const {
+  constexpr bool NeedsRex() const {
     return reg_ > 7;
   }
  private:
@@ -47,15 +47,15 @@
 
 class XmmRegister {
  public:
-  explicit XmmRegister(FloatRegister r) : reg_(r) {}
-  explicit XmmRegister(int r) : reg_(FloatRegister(r)) {}
-  FloatRegister AsFloatRegister() const {
+  explicit constexpr XmmRegister(FloatRegister r) : reg_(r) {}
+  explicit constexpr XmmRegister(int r) : reg_(FloatRegister(r)) {}
+  constexpr FloatRegister AsFloatRegister() const {
     return reg_;
   }
-  uint8_t LowBits() const {
+  constexpr uint8_t LowBits() const {
     return reg_ & 7;
   }
-  bool NeedsRex() const {
+  constexpr bool NeedsRex() const {
     return reg_ > 7;
   }
  private:
@@ -106,6 +106,8 @@
   kNotZero      = kNotEqual,
   kNegative     = kSign,
   kPositive     = kNotSign,
+  kCarrySet     = kBelow,
+  kCarryClear   = kAboveEqual,
   kUnordered    = kParityEven
 };
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
new file mode 100644
index 0000000..3e687a7
--- /dev/null
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -0,0 +1,609 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_x86_64.h"
+
+#include "base/casts.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "memory_region.h"
+#include "thread.h"
+
+namespace art {
+namespace x86_64 {
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
+}
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
+}
+
+constexpr size_t kFramePointerSize = 8;
+
+#define __ asm_.
+
+void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                         ManagedRegister method_reg,
+                                         ArrayRef<const ManagedRegister> spill_regs,
+                                         const ManagedRegisterEntrySpills& entry_spills) {
+  DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
+  cfi().SetCurrentCFAOffset(8);  // Return address on stack.
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  int gpr_count = 0;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsCpuRegister()) {
+      __ pushq(spill.AsCpuRegister());
+      gpr_count++;
+      cfi().AdjustCFAOffset(kFramePointerSize);
+      cfi().RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
+    }
+  }
+  // return address then method on stack.
+  int64_t rest_of_frame = static_cast<int64_t>(frame_size)
+                          - (gpr_count * kFramePointerSize)
+                          - kFramePointerSize /*return address*/;
+  __ subq(CpuRegister(RSP), Immediate(rest_of_frame));
+  cfi().AdjustCFAOffset(rest_of_frame);
+
+  // spill xmms
+  int64_t offset = rest_of_frame;
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsXmmRegister()) {
+      offset -= sizeof(double);
+      __ movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+      cfi().RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
+    }
+  }
+
+  static_assert(static_cast<size_t>(kX86_64PointerSize) == kFramePointerSize,
+                "Unexpected frame pointer size.");
+
+  __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
+
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    if (spill.AsX86_64().IsCpuRegister()) {
+      if (spill.getSize() == 8) {
+        __ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                spill.AsX86_64().AsCpuRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                spill.AsX86_64().AsCpuRegister());
+      }
+    } else {
+      if (spill.getSize() == 8) {
+        __ movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                 spill.AsX86_64().AsXmmRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        __ movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
+                 spill.AsX86_64().AsXmmRegister());
+      }
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                          ArrayRef<const ManagedRegister> spill_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi().RememberState();
+  int gpr_count = 0;
+  // unspill xmms
+  int64_t offset = static_cast<int64_t>(frame_size)
+      - (spill_regs.size() * kFramePointerSize)
+      - 2 * kFramePointerSize;
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsXmmRegister()) {
+      offset += sizeof(double);
+      __ movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+      cfi().Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
+    } else {
+      gpr_count++;
+    }
+  }
+  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
+  __ addq(CpuRegister(RSP), Immediate(adjust));
+  cfi().AdjustCFAOffset(-adjust);
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
+    if (spill.IsCpuRegister()) {
+      __ popq(spill.AsCpuRegister());
+      cfi().AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+      cfi().Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
+    }
+  }
+  __ ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi().RestoreState();
+  cfi().DefCFAOffset(frame_size);
+}
+
+void X86_64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  __ addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
+  cfi().AdjustCFAOffset(adjust);
+}
+
+static void DecreaseFrameSizeImpl(size_t adjust, X86_64Assembler* assembler) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  assembler->addq(CpuRegister(RSP), Immediate(adjust));
+  assembler->cfi().AdjustCFAOffset(-adjust);
+}
+
+void X86_64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  DecreaseFrameSizeImpl(adjust, &asm_);
+}
+
+void X86_64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size);
+      __ movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
+    } else {
+      CHECK_EQ(8u, size);
+      __ movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
+    }
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(0u, size);
+    __ movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow());
+    __ movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)),
+            src.AsRegisterPairHigh());
+  } else if (src.IsX87Register()) {
+    if (size == 4) {
+      __ fstps(Address(CpuRegister(RSP), offs));
+    } else {
+      __ fstpl(Address(CpuRegister(RSP), offs));
+    }
+  } else {
+    CHECK(src.IsXmmRegister());
+    if (size == 4) {
+      __ movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
+    } else {
+      __ movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  CHECK(src.IsCpuRegister());
+  __ movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  CHECK(src.IsCpuRegister());
+  __ movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
+                                                    uint32_t imm,
+                                                    ManagedRegister) {
+  __ movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
+}
+
+void X86_64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                                       FrameOffset fr_offs,
+                                                       ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
+  __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
+  __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
+}
+
+void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
+                                            ManagedRegister /*src*/,
+                                            FrameOffset /*in_off*/,
+                                            ManagedRegister /*scratch*/) {
+  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
+}
+
+void X86_64JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size);
+      __ movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+    } else {
+      CHECK_EQ(8u, size);
+      __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+    }
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(0u, size);
+    __ movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src));
+    __ movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ flds(Address(CpuRegister(RSP), src));
+    } else {
+      __ fldl(Address(CpuRegister(RSP), src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
+    } else {
+      __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadFromThread(ManagedRegister mdest,
+                                             ThreadOffset64 src, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    __ gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    __ gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      __ gs()->flds(Address::Absolute(src, true));
+    } else {
+      __ gs()->fldl(Address::Absolute(src, true));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      __ gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
+    } else {
+      __ gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister());
+  __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+}
+
+void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest,
+                                      ManagedRegister mbase,
+                                      MemberOffset offs,
+                                      bool unpoison_reference) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  CHECK(dest.IsCpuRegister());
+  __ movl(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(dest.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
+                                         ManagedRegister mbase,
+                                         Offset offs) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  CHECK(dest.IsCpuRegister());
+  __ movq(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
+}
+
+void X86_64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister());
+  __ gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
+}
+
+void X86_64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
+  X86_64ManagedRegister reg = mreg.AsX86_64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movsxb(reg.AsCpuRegister(), reg.AsCpuRegister());
+  } else {
+    __ movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  X86_64ManagedRegister reg = mreg.AsX86_64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    __ movzxb(reg.AsCpuRegister(), reg.AsCpuRegister());
+  } else {
+    __ movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  if (!dest.Equals(src)) {
+    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
+      __ movq(dest.AsCpuRegister(), src.AsCpuRegister());
+    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
+      // Pass via stack and pop X87 register
+      __ subl(CpuRegister(RSP), Immediate(16));
+      if (size == 4) {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstps(Address(CpuRegister(RSP), 0));
+        __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
+      } else {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        __ fstpl(Address(CpuRegister(RSP), 0));
+        __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0));
+      }
+      __ addq(CpuRegister(RSP), Immediate(16));
+    } else {
+      // TODO: x87, SSE
+      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
+    }
+  }
+}
+
+void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
+  __ movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                                   ThreadOffset64 thr_offs,
+                                                   ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  __ gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
+  Store(fr_offs, scratch, 8);
+}
+
+void X86_64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                                                 FrameOffset fr_offs,
+                                                 ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  Load(scratch, fr_offs, 8);
+  __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   FrameOffset src,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  if (scratch.IsCpuRegister() && size == 8) {
+    Load(scratch, src, 4);
+    Store(dest, scratch, 4);
+    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
+    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+  } else {
+    Load(scratch, src, size);
+    Store(dest, scratch, size);
+  }
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
+                                   ManagedRegister /*src_base*/,
+                                   Offset /*src_offset*/,
+                                   ManagedRegister /*scratch*/,
+                                   size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void X86_64JNIMacroAssembler::Copy(ManagedRegister dest_base,
+                                   Offset dest_offset,
+                                   FrameOffset src,
+                                   ManagedRegister scratch,
+                                   size_t size) {
+  CHECK(scratch.IsNoRegister());
+  CHECK_EQ(size, 4u);
+  __ pushq(Address(CpuRegister(RSP), src));
+  __ popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   FrameOffset src_base,
+                                   Offset src_offset,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  __ movq(scratch, Address(CpuRegister(RSP), src_base));
+  __ movq(scratch, Address(scratch, src_offset));
+  __ movq(Address(CpuRegister(RSP), dest), scratch);
+}
+
+void X86_64JNIMacroAssembler::Copy(ManagedRegister dest,
+                                   Offset dest_offset,
+                                   ManagedRegister src,
+                                   Offset src_offset,
+                                   ManagedRegister scratch,
+                                   size_t size) {
+  CHECK_EQ(size, 4u);
+  CHECK(scratch.IsNoRegister());
+  __ pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
+  __ popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
+}
+
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
+                                   Offset dest_offset,
+                                   FrameOffset src,
+                                   Offset src_offset,
+                                   ManagedRegister mscratch,
+                                   size_t size) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  CHECK_EQ(dest.Int32Value(), src.Int32Value());
+  __ movq(scratch, Address(CpuRegister(RSP), src));
+  __ pushq(Address(scratch, src_offset));
+  __ popq(Address(scratch, dest_offset));
+}
+
+void X86_64JNIMacroAssembler::MemoryBarrier(ManagedRegister) {
+  __ mfence();
+}
+
+void X86_64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                                     FrameOffset handle_scope_offset,
+                                                     ManagedRegister min_reg,
+                                                     bool null_allowed) {
+  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
+  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
+  if (in_reg.IsNoRegister()) {  // TODO(64): && null_allowed
+    // Use out_reg as indicator of null.
+    in_reg = out_reg;
+    // TODO: movzwl
+    __ movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+  CHECK(in_reg.IsCpuRegister());
+  CHECK(out_reg.IsCpuRegister());
+  VerifyObject(in_reg, null_allowed);
+  if (null_allowed) {
+    Label null_arg;
+    if (!out_reg.Equals(in_reg)) {
+      __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+    }
+    __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+}
+
+void X86_64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                                     FrameOffset handle_scope_offset,
+                                                     ManagedRegister mscratch,
+                                                     bool null_allowed) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  if (null_allowed) {
+    Label null_arg;
+    __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ j(kZero, &null_arg);
+    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ Bind(&null_arg);
+  } else {
+    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+  }
+  Store(out_off, scratch, 8);
+}
+
+// Given a handle scope entry, load the associated reference.
+void X86_64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
+                                                           ManagedRegister min_reg) {
+  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
+  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
+  CHECK(out_reg.IsCpuRegister());
+  CHECK(in_reg.IsCpuRegister());
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+  }
+  __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+  __ j(kZero, &null_arg);
+  __ movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
+  __ Bind(&null_arg);
+}
+
+void X86_64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86_64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
+  // TODO: place reference map on call
+}
+
+void X86_64JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+  __ movq(scratch, Address(CpuRegister(RSP), base));
+  __ call(Address(scratch, offset));
+}
+
+void X86_64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset, ManagedRegister /*mscratch*/) {
+  __ gs()->call(Address::Absolute(offset, true));
+}
+
+void X86_64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
+  __ gs()->movq(tr.AsX86_64().AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
+}
+
+void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  __ gs()->movq(scratch.AsCpuRegister(),
+                Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
+  __ movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
+}
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class X86_64ExceptionSlowPath FINAL : public SlowPath {
+ public:
+  explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
+  virtual void Emit(Assembler *sp_asm) OVERRIDE;
+ private:
+  const size_t stack_adjust_;
+};
+
+void X86_64JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+  X86_64ExceptionSlowPath* slow = new (__ GetArena()) X86_64ExceptionSlowPath(stack_adjust);
+  __ GetBuffer()->EnqueueSlowPath(slow);
+  __ gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true), Immediate(0));
+  __ j(kNotEqual, slow->Entry());
+}
+
+#undef __
+
+void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
+  X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  // Note: the return value is dead
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSizeImpl(stack_adjust_, sp_asm);
+  }
+  // Pass exception as argument in RDI
+  __ gs()->movq(CpuRegister(RDI),
+                Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true));
+  __ gs()->call(
+      Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pDeliverException), true));
+  // this call should never return
+  __ int3();
+#undef __
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
new file mode 100644
index 0000000..cc4e57c
--- /dev/null
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
+#define ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
+
+#include <vector>
+
+#include "assembler_x86_64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+
+namespace art {
+namespace x86_64 {
+
+class X86_64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86_64Assembler,
+                                                                  PointerSize::k64> {
+ public:
+  explicit X86_64JNIMacroAssembler(ArenaAllocator* arena)
+      : JNIMacroAssemblerFwd<X86_64Assembler, PointerSize::k64>(arena) {}
+  virtual ~X86_64JNIMacroAssembler() {}
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
+                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
+
+  // Emit code that will remove an activation from the stack
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
+      OVERRIDE;
+
+  void IncreaseFrameSize(size_t adjust) OVERRIDE;
+  void DecreaseFrameSize(size_t adjust) OVERRIDE;
+
+  // Store routines
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
+  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
+  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
+
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
+
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister src,
+                     FrameOffset in_off,
+                     ManagedRegister scratch) OVERRIDE;
+
+  // Load routines
+  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
+
+  void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
+
+  void LoadRef(ManagedRegister dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) OVERRIDE;
+
+  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
+
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
+
+  // Copying routines
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size);
+
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+      OVERRIDE;
+
+  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
+
+  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister scratch,
+            size_t size) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister) OVERRIDE;
+
+  // Sign extension
+  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Zero extension
+  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
+
+  // Exploit fast access in managed code to Thread::Current()
+  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
+  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister scratch,
+                              bool null_allowed) OVERRIDE;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
+  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
+
+  // Call to address held at [base+offset]
+  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(X86_64JNIMacroAssembler);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index c4228c1..32af672 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -88,52 +88,52 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
-  CpuRegister AsCpuRegister() const {
+  constexpr CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));
   }
 
-  XmmRegister AsXmmRegister() const {
+  constexpr XmmRegister AsXmmRegister() const {
     CHECK(IsXmmRegister());
     return XmmRegister(static_cast<FloatRegister>(id_ - kNumberOfCpuRegIds));
   }
 
-  X87Register AsX87Register() const {
+  constexpr X87Register AsX87Register() const {
     CHECK(IsX87Register());
     return static_cast<X87Register>(id_ -
                                     (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
   }
 
-  CpuRegister AsRegisterPairLow() const {
+  constexpr CpuRegister AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCpuRegister();
   }
 
-  CpuRegister AsRegisterPairHigh() const {
+  constexpr CpuRegister AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
-  bool IsCpuRegister() const {
+  constexpr bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
   }
 
-  bool IsXmmRegister() const {
+  constexpr bool IsXmmRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCpuRegIds;
     return (0 <= test) && (test < kNumberOfXmmRegIds);
   }
 
-  bool IsX87Register() const {
+  constexpr bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
     return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
@@ -147,32 +147,32 @@
   // then false is returned.
   bool Overlaps(const X86_64ManagedRegister& other) const;
 
-  static X86_64ManagedRegister FromCpuRegister(Register r) {
+  static constexpr X86_64ManagedRegister FromCpuRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static X86_64ManagedRegister FromXmmRegister(FloatRegister r) {
+  static constexpr X86_64ManagedRegister FromXmmRegister(FloatRegister r) {
     return FromRegId(r + kNumberOfCpuRegIds);
   }
 
-  static X86_64ManagedRegister FromX87Register(X87Register r) {
+  static constexpr X86_64ManagedRegister FromX87Register(X87Register r) {
     CHECK_NE(r, kNoX87Register);
     return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
   }
 
-  static X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
                           kNumberOfX87RegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -188,9 +188,9 @@
 
   friend class ManagedRegister;
 
-  explicit X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static X86_64ManagedRegister FromRegId(int reg_id) {
+  static constexpr X86_64ManagedRegister FromRegId(int reg_id) {
     X86_64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -201,7 +201,7 @@
 
 }  // namespace x86_64
 
-inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
+constexpr inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
   x86_64::X86_64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index dfc379f..37acef6 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -62,7 +62,6 @@
   libnativebridge \
   libnativeloader \
   libsigchain_dummy \
-  libvixl \
   liblog \
   libz \
   libbacktrace \
@@ -83,14 +82,14 @@
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,ndebug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libvixl-arm64 $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,ndebug,$(dex2oat_host_arch),static))
   endif
 endif
 
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host liblz4,art/compiler,host,debug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,debug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libvixld-arm64 $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,debug,$(dex2oat_host_arch),static))
   endif
 endif
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index cfb905b..cfcfe1c 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -516,6 +516,7 @@
       compiled_classes_filename_(nullptr),
       compiled_methods_zip_filename_(nullptr),
       compiled_methods_filename_(nullptr),
+      passes_to_run_filename_(nullptr),
       app_image_(false),
       boot_image_(false),
       multi_image_(false),
@@ -684,12 +685,6 @@
       Usage("Can't have both --image and (--app-image-fd or --app-image-file)");
     }
 
-    if (IsBootImage()) {
-      // We need the boot image to always be debuggable.
-      // TODO: Remove this once we better deal with full frame deoptimization.
-      compiler_options_->debuggable_ = true;
-    }
-
     if (oat_filenames_.empty() && oat_fd_ == -1) {
       Usage("Output must be supplied with either --oat-file or --oat-fd");
     }
@@ -900,6 +895,16 @@
       }
     }
     compiler_options_->force_determinism_ = force_determinism_;
+
+    if (passes_to_run_filename_ != nullptr) {
+      passes_to_run_.reset(ReadCommentedInputFromFile<std::vector<std::string>>(
+          passes_to_run_filename_,
+          nullptr));         // No post-processing.
+      if (passes_to_run_.get() == nullptr) {
+        Usage("Failed to read list of passes to run.");
+      }
+    }
+    compiler_options_->passes_to_run_ = passes_to_run_.get();
   }
 
   static bool SupportsDeterministicCompilation() {
@@ -1099,6 +1104,8 @@
         compiled_methods_filename_ = option.substr(strlen("--compiled-methods=")).data();
       } else if (option.starts_with("--compiled-methods-zip=")) {
         compiled_methods_zip_filename_ = option.substr(strlen("--compiled-methods-zip=")).data();
+      } else if (option.starts_with("--run-passes=")) {
+        passes_to_run_filename_ = option.substr(strlen("--run-passes=")).data();
       } else if (option.starts_with("--base=")) {
         ParseBase(option);
       } else if (option.starts_with("--boot-image=")) {
@@ -1569,7 +1576,7 @@
                                      IsAppImage(),
                                      image_classes_.release(),
                                      compiled_classes_.release(),
-                                     /* compiled_methods */ nullptr,
+                                     compiled_methods_.release(),
                                      thread_count_,
                                      dump_stats_,
                                      dump_passes_,
@@ -2038,11 +2045,13 @@
       if (location == OatFile::kSpecialSharedLibrary) {
         break;
       }
+      static constexpr bool kVerifyChecksum = true;
       std::string error_msg;
-      if (!DexFile::Open(location.c_str(), location.c_str(), &error_msg, opened_dex_files)) {
+      if (!DexFile::Open(
+          location.c_str(), location.c_str(), kVerifyChecksum, &error_msg, opened_dex_files)) {
         // If we fail to open the dex file because it's been stripped, try to open the dex file
         // from its corresponding oat file.
-        OatFileAssistant oat_file_assistant(location.c_str(), isa, false, false);
+        OatFileAssistant oat_file_assistant(location.c_str(), isa, false);
         std::unique_ptr<OatFile> oat_file(oat_file_assistant.GetBestOatFile());
         if (oat_file == nullptr) {
           LOG(WARNING) << "Failed to open dex file and associated oat file for '" << location
@@ -2110,13 +2119,15 @@
     if (compiled_methods_filename_ != nullptr) {
       std::string error_msg;
       if (compiled_methods_zip_filename_ != nullptr) {
-        compiled_methods_.reset(ReadCommentedInputFromZip(compiled_methods_zip_filename_,
-                                                          compiled_methods_filename_,
-                                                          nullptr,            // No post-processing.
-                                                          &error_msg));
+        compiled_methods_.reset(ReadCommentedInputFromZip<std::unordered_set<std::string>>(
+            compiled_methods_zip_filename_,
+            compiled_methods_filename_,
+            nullptr,            // No post-processing.
+            &error_msg));
       } else {
-        compiled_methods_.reset(ReadCommentedInputFromFile(compiled_methods_filename_,
-                                                           nullptr));         // No post-processing.
+        compiled_methods_.reset(ReadCommentedInputFromFile<std::unordered_set<std::string>>(
+            compiled_methods_filename_,
+            nullptr));          // No post-processing.
       }
       if (compiled_methods_.get() == nullptr) {
         LOG(ERROR) << "Failed to create list of compiled methods from '"
@@ -2149,7 +2160,8 @@
     TimingLogger::ScopedTiming t2("AddDexFileSources", timings_);
     if (zip_fd_ != -1) {
       DCHECK_EQ(oat_writers_.size(), 1u);
-      if (!oat_writers_[0]->AddZippedDexFilesSource(ScopedFd(zip_fd_), zip_location_.c_str())) {
+      if (!oat_writers_[0]->AddZippedDexFilesSource(File(zip_fd_, /* check_usage */ false),
+                                                    zip_location_.c_str())) {
         return false;
       }
     } else if (oat_writers_.size() > 1u) {
@@ -2349,7 +2361,8 @@
   static std::unordered_set<std::string>* ReadImageClassesFromFile(
       const char* image_classes_filename) {
     std::function<std::string(const char*)> process = DotToDescriptor;
-    return ReadCommentedInputFromFile(image_classes_filename, &process);
+    return ReadCommentedInputFromFile<std::unordered_set<std::string>>(image_classes_filename,
+                                                                       &process);
   }
 
   // Reads the class names (java.lang.Object) and returns a set of descriptors (Ljava/lang/Object;)
@@ -2358,27 +2371,32 @@
         const char* image_classes_filename,
         std::string* error_msg) {
     std::function<std::string(const char*)> process = DotToDescriptor;
-    return ReadCommentedInputFromZip(zip_filename, image_classes_filename, &process, error_msg);
+    return ReadCommentedInputFromZip<std::unordered_set<std::string>>(zip_filename,
+                                                                      image_classes_filename,
+                                                                      &process,
+                                                                      error_msg);
   }
 
   // Read lines from the given file, dropping comments and empty lines. Post-process each line with
   // the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputFromFile(
+  template <typename T>
+  static T* ReadCommentedInputFromFile(
       const char* input_filename, std::function<std::string(const char*)>* process) {
     std::unique_ptr<std::ifstream> input_file(new std::ifstream(input_filename, std::ifstream::in));
     if (input_file.get() == nullptr) {
       LOG(ERROR) << "Failed to open input file " << input_filename;
       return nullptr;
     }
-    std::unique_ptr<std::unordered_set<std::string>> result(
-        ReadCommentedInputStream(*input_file, process));
+    std::unique_ptr<T> result(
+        ReadCommentedInputStream<T>(*input_file, process));
     input_file->close();
     return result.release();
   }
 
   // Read lines from the given file from the given zip file, dropping comments and empty lines.
   // Post-process each line with the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputFromZip(
+  template <typename T>
+  static T* ReadCommentedInputFromZip(
       const char* zip_filename,
       const char* input_filename,
       std::function<std::string(const char*)>* process,
@@ -2404,16 +2422,16 @@
     const std::string input_string(reinterpret_cast<char*>(input_file->Begin()),
                                    input_file->Size());
     std::istringstream input_stream(input_string);
-    return ReadCommentedInputStream(input_stream, process);
+    return ReadCommentedInputStream<T>(input_stream, process);
   }
 
   // Read lines from the given stream, dropping comments and empty lines. Post-process each line
   // with the given function.
-  static std::unordered_set<std::string>* ReadCommentedInputStream(
+  template <typename T>
+  static T* ReadCommentedInputStream(
       std::istream& in_stream,
       std::function<std::string(const char*)>* process) {
-    std::unique_ptr<std::unordered_set<std::string>> image_classes(
-        new std::unordered_set<std::string>);
+    std::unique_ptr<T> output(new T());
     while (in_stream.good()) {
       std::string dot;
       std::getline(in_stream, dot);
@@ -2422,12 +2440,12 @@
       }
       if (process != nullptr) {
         std::string descriptor((*process)(dot.c_str()));
-        image_classes->insert(descriptor);
+        output->insert(output->end(), descriptor);
       } else {
-        image_classes->insert(dot);
+        output->insert(output->end(), dot);
       }
     }
-    return image_classes.release();
+    return output.release();
   }
 
   void LogCompletionTime() {
@@ -2504,9 +2522,11 @@
   const char* compiled_classes_filename_;
   const char* compiled_methods_zip_filename_;
   const char* compiled_methods_filename_;
+  const char* passes_to_run_filename_;
   std::unique_ptr<std::unordered_set<std::string>> image_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_methods_;
+  std::unique_ptr<std::vector<std::string>> passes_to_run_;
   bool app_image_;
   bool boot_image_;
   bool multi_image_;
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 6188883..58dd047 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
+#include <regex>
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
 
 #include "common_runtime_test.h"
 
@@ -154,33 +155,44 @@
     CHECK(android_root != nullptr);
     argv.push_back("--android-root=" + std::string(android_root));
 
-    std::string command_line(Join(argv, ' '));
+    int link[2];
 
-    // We need to fix up the '&' being used for "do not check classpath."
-    size_t ampersand = command_line.find(" &");
-    CHECK_NE(ampersand, std::string::npos);
-    command_line = command_line.replace(ampersand, 2, " \\&");
+    if (pipe(link) == -1) {
+      return false;
+    }
 
-    command_line += " 2>&1";
+    pid_t pid = fork();
+    if (pid == -1) {
+      return false;
+    }
 
-    // We need dex2oat to actually log things.
-    setenv("ANDROID_LOG_TAGS", "*:d", 1);
-
-    FILE* pipe = popen(command_line.c_str(), "r");
-
-    setenv("ANDROID_LOG_TAGS", "*:e", 1);
-
-    if (pipe == nullptr) {
-      success_ = false;
-    } else {
-      char buffer[128];
-
-      while (fgets(buffer, 128, pipe) != nullptr) {
-        output_ += buffer;
+    if (pid == 0) {
+      // We need dex2oat to actually log things.
+      setenv("ANDROID_LOG_TAGS", "*:d", 1);
+      dup2(link[1], STDERR_FILENO);
+      close(link[0]);
+      close(link[1]);
+      std::vector<const char*> c_args;
+      for (const std::string& str : argv) {
+        c_args.push_back(str.c_str());
       }
+      c_args.push_back(nullptr);
+      execv(c_args[0], const_cast<char* const*>(c_args.data()));
+      exit(1);
+    } else {
+      close(link[1]);
+      char buffer[128];
+      memset(buffer, 0, 128);
+      ssize_t bytes_read = 0;
 
-      int result = pclose(pipe);
-      success_ = result == 0;
+      while (TEMP_FAILURE_RETRY(bytes_read = read(link[0], buffer, 128)) > 0) {
+        output_ += std::string(buffer, bytes_read);
+      }
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        success_ = (status == 0);
+      }
     }
     return success_;
   }
@@ -196,7 +208,7 @@
     std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar";
     std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex";
 
-    Copy(GetDexSrc1(), dex_location);
+    Copy(GetTestDexFileName(), dex_location);
 
     std::vector<std::string> copy(extra_args);
 
@@ -215,7 +227,11 @@
     CheckResult(expect_use);
   }
 
-  void CheckResult(bool expect_use) {
+  virtual std::string GetTestDexFileName() {
+    return GetDexSrc1();
+  }
+
+  virtual void CheckResult(bool expect_use) {
     if (kIsTargetBuild) {
       CheckTargetResult(expect_use);
     } else {
@@ -223,13 +239,13 @@
     }
   }
 
-  void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
+  virtual void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
     // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
     //       something for variants with file descriptor where we can control the lifetime of
     //       the swap file and thus take a look at it.
   }
 
-  void CheckHostResult(bool expect_use) {
+  virtual void CheckHostResult(bool expect_use) {
     if (!kIsTargetBuild) {
       if (expect_use) {
         EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
@@ -242,7 +258,7 @@
   }
 
   // Check whether the dex2oat run was really successful.
-  void CheckValidity() {
+  virtual void CheckValidity() {
     if (kIsTargetBuild) {
       CheckTargetValidity();
     } else {
@@ -250,14 +266,14 @@
     }
   }
 
-  void CheckTargetValidity() {
+  virtual void CheckTargetValidity() {
     // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
     //       something for variants with file descriptor where we can control the lifetime of
     //       the swap file and thus take a look at it.
   }
 
   // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
-  void CheckHostValidity() {
+  virtual void CheckHostValidity() {
     EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
   }
 };
@@ -286,6 +302,127 @@
           { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
 }
 
+class Dex2oatSwapUseTest : public Dex2oatSwapTest {
+ protected:
+  void CheckHostResult(bool expect_use) OVERRIDE {
+    if (!kIsTargetBuild) {
+      if (expect_use) {
+        EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      } else {
+        EXPECT_EQ(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      }
+    }
+  }
+
+  std::string GetTestDexFileName() OVERRIDE {
+    // Use Statics as it has a handful of functions.
+    return CommonRuntimeTest::GetTestDexFileName("Statics");
+  }
+
+  void GrabResult1() {
+    if (!kIsTargetBuild) {
+      native_alloc_1_ = ParseNativeAlloc();
+      swap_1_ = ParseSwap(false /* expected */);
+    } else {
+      native_alloc_1_ = std::numeric_limits<size_t>::max();
+      swap_1_ = 0;
+    }
+  }
+
+  void GrabResult2() {
+    if (!kIsTargetBuild) {
+      native_alloc_2_ = ParseNativeAlloc();
+      swap_2_ = ParseSwap(true /* expected */);
+    } else {
+      native_alloc_2_ = 0;
+      swap_2_ = std::numeric_limits<size_t>::max();
+    }
+  }
+
+ private:
+  size_t ParseNativeAlloc() {
+    std::regex native_alloc_regex("dex2oat took.*native alloc=[^ ]+ \\(([0-9]+)B\\)");
+    std::smatch native_alloc_match;
+    bool found = std::regex_search(output_, native_alloc_match, native_alloc_regex);
+    if (!found) {
+      EXPECT_TRUE(found);
+      return 0;
+    }
+    if (native_alloc_match.size() != 2U) {
+      EXPECT_EQ(native_alloc_match.size(), 2U);
+      return 0;
+    }
+
+    std::istringstream stream(native_alloc_match[1].str());
+    size_t value;
+    stream >> value;
+
+    return value;
+  }
+
+  size_t ParseSwap(bool expected) {
+    std::regex swap_regex("dex2oat took[^\\n]+swap=[^ ]+ \\(([0-9]+)B\\)");
+    std::smatch swap_match;
+    bool found = std::regex_search(output_, swap_match, swap_regex);
+    if (found != expected) {
+      EXPECT_EQ(expected, found);
+      return 0;
+    }
+
+    if (!found) {
+      return 0;
+    }
+
+    if (swap_match.size() != 2U) {
+      EXPECT_EQ(swap_match.size(), 2U);
+      return 0;
+    }
+
+    std::istringstream stream(swap_match[1].str());
+    size_t value;
+    stream >> value;
+
+    return value;
+  }
+
+ protected:
+  size_t native_alloc_1_;
+  size_t native_alloc_2_;
+
+  size_t swap_1_;
+  size_t swap_2_;
+};
+
+TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) {
+  // The `native_alloc_2_ >= native_alloc_1_` assertion below may not
+  // hold true on some x86 systems when read barriers are enabled;
+  // disable this test while we investigate (b/29259363).
+  TEST_DISABLED_FOR_READ_BARRIER_ON_X86();
+
+  RunTest(false /* use_fd */,
+          false /* expect_use */);
+  GrabResult1();
+  std::string output_1 = output_;
+
+  output_ = "";
+
+  RunTest(false /* use_fd */,
+          true /* expect_use */,
+          { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+  GrabResult2();
+  std::string output_2 = output_;
+
+  if (native_alloc_2_ >= native_alloc_1_ || swap_1_ >= swap_2_) {
+    EXPECT_LT(native_alloc_2_, native_alloc_1_);
+    EXPECT_LT(swap_1_, swap_2_);
+
+    LOG(ERROR) << output_1;
+    LOG(ERROR) << output_2;
+  }
+}
+
 class Dex2oatVeryLargeTest : public Dex2oatTest {
  protected:
   void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 1a2f2c2..2042934 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -17,8 +17,8 @@
  *
  * This is a re-implementation of the original dexdump utility that was
  * based on Dalvik functions in libdex into a new dexdump that is now
- * based on Art functions in libart instead. The output is identical to
- * the original for correct DEX files. Error messages may differ, however.
+ * based on Art functions in libart instead. The output is very similar to
+ * to the original for correct DEX files. Error messages may differ, however.
  * Also, ODEX files are no longer supported.
  *
  * The dexdump tool is intended to mimic objdump.  When possible, use
@@ -65,6 +65,8 @@
 typedef uint16_t u2;
 typedef uint32_t u4;
 typedef uint64_t u8;
+typedef int8_t   s1;
+typedef int16_t  s2;
 typedef int32_t  s4;
 typedef int64_t  s8;
 
@@ -116,7 +118,7 @@
  * "[I" becomes "int[]".  Also converts '$' to '.', which means this
  * form can't be converted back to a descriptor.
  */
-static char* descriptorToDot(const char* str) {
+static std::unique_ptr<char[]> descriptorToDot(const char* str) {
   int targetLen = strlen(str);
   int offset = 0;
 
@@ -143,8 +145,7 @@
   }
 
   // Copy class name over.
-  char* newStr = reinterpret_cast<char*>(
-      malloc(targetLen + arrayDepth * 2 + 1));
+  std::unique_ptr<char[]> newStr(new char[targetLen + arrayDepth * 2 + 1]);
   int i = 0;
   for (; i < targetLen; i++) {
     const char ch = str[offset + i];
@@ -163,12 +164,10 @@
 
 /*
  * Converts the class name portion of a type descriptor to human-readable
- * "dotted" form.
- *
- * Returns a newly-allocated string.
+ * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
  */
-static char* descriptorClassToDot(const char* str) {
-  // Reduce to just the class name, trimming trailing ';'.
+static std::unique_ptr<char[]> descriptorClassToDot(const char* str) {
+  // Reduce to just the class name prefix.
   const char* lastSlash = strrchr(str, '/');
   if (lastSlash == nullptr) {
     lastSlash = str + 1;  // start past 'L'
@@ -176,17 +175,25 @@
     lastSlash++;          // start past '/'
   }
 
-  char* newStr = strdup(lastSlash);
-  newStr[strlen(lastSlash) - 1] = '\0';
-  for (char* cp = newStr; *cp != '\0'; cp++) {
-    if (*cp == '$') {
-      *cp = '.';
-    }
+  // Copy class name over, trimming trailing ';'.
+  const int targetLen = strlen(lastSlash);
+  std::unique_ptr<char[]> newStr(new char[targetLen]);
+  for (int i = 0; i < targetLen - 1; i++) {
+    const char ch = lastSlash[i];
+    newStr[i] = ch == '$' ? '.' : ch;
   }  // for
+  newStr[targetLen - 1] = '\0';
   return newStr;
 }
 
 /*
+ * Returns string representing the boolean value.
+ */
+static const char* strBool(bool val) {
+  return val ? "true" : "false";
+}
+
+/*
  * Returns a quoted string representing the boolean value.
  */
 static const char* quotedBool(bool val) {
@@ -346,10 +353,197 @@
 }
 
 /*
+ * Dumps a string value with some escape characters.
+ */
+static void dumpEscapedString(const char* p) {
+  fputs("\"", gOutFile);
+  for (; *p; p++) {
+    switch (*p) {
+      case '\\':
+        fputs("\\\\", gOutFile);
+        break;
+      case '\"':
+        fputs("\\\"", gOutFile);
+        break;
+      case '\t':
+        fputs("\\t", gOutFile);
+        break;
+      case '\n':
+        fputs("\\n", gOutFile);
+        break;
+      case '\r':
+        fputs("\\r", gOutFile);
+        break;
+      default:
+        putc(*p, gOutFile);
+    }  // switch
+  }  // for
+  fputs("\"", gOutFile);
+}
+
+/*
+ * Dumps a string as an XML attribute value.
+ */
+static void dumpXmlAttribute(const char* p) {
+  for (; *p; p++) {
+    switch (*p) {
+      case '&':
+        fputs("&amp;", gOutFile);
+        break;
+      case '<':
+        fputs("&lt;", gOutFile);
+        break;
+      case '>':
+        fputs("&gt;", gOutFile);
+        break;
+      case '"':
+        fputs("&quot;", gOutFile);
+        break;
+      case '\t':
+        fputs("&#x9;", gOutFile);
+        break;
+      case '\n':
+        fputs("&#xA;", gOutFile);
+        break;
+      case '\r':
+        fputs("&#xD;", gOutFile);
+        break;
+      default:
+        putc(*p, gOutFile);
+    }  // switch
+  }  // for
+}
+
+/*
+ * Reads variable width value, possibly sign extended at the last defined byte.
+ */
+static u8 readVarWidth(const u1** data, u1 arg, bool sign_extend) {
+  u8 value = 0;
+  for (u4 i = 0; i <= arg; i++) {
+    value |= static_cast<u8>(*(*data)++) << (i * 8);
+  }
+  if (sign_extend) {
+    int shift = (7 - arg) * 8;
+    return (static_cast<s8>(value) << shift) >> shift;
+  }
+  return value;
+}
+
+/*
+ * Dumps encoded value.
+ */
+static void dumpEncodedValue(const DexFile* pDexFile, const u1** data);  // forward
+static void dumpEncodedValue(const DexFile* pDexFile, const u1** data, u1 type, u1 arg) {
+  switch (type) {
+    case DexFile::kDexAnnotationByte:
+      fprintf(gOutFile, "%" PRId8, static_cast<s1>(readVarWidth(data, arg, false)));
+      break;
+    case DexFile::kDexAnnotationShort:
+      fprintf(gOutFile, "%" PRId16, static_cast<s2>(readVarWidth(data, arg, true)));
+      break;
+    case DexFile::kDexAnnotationChar:
+      fprintf(gOutFile, "%" PRIu16, static_cast<u2>(readVarWidth(data, arg, false)));
+      break;
+    case DexFile::kDexAnnotationInt:
+      fprintf(gOutFile, "%" PRId32, static_cast<s4>(readVarWidth(data, arg, true)));
+      break;
+    case DexFile::kDexAnnotationLong:
+      fprintf(gOutFile, "%" PRId64, static_cast<s8>(readVarWidth(data, arg, true)));
+      break;
+    case DexFile::kDexAnnotationFloat: {
+      // Fill on right.
+      union {
+        float f;
+        u4 data;
+      } conv;
+      conv.data = static_cast<u4>(readVarWidth(data, arg, false)) << (3 - arg) * 8;
+      fprintf(gOutFile, "%g", conv.f);
+      break;
+    }
+    case DexFile::kDexAnnotationDouble: {
+      // Fill on right.
+      union {
+        double d;
+        u8 data;
+      } conv;
+      conv.data = readVarWidth(data, arg, false) << (7 - arg) * 8;
+      fprintf(gOutFile, "%g", conv.d);
+      break;
+    }
+    case DexFile::kDexAnnotationString: {
+      const u4 idx = static_cast<u4>(readVarWidth(data, arg, false));
+      if (gOptions.outputFormat == OUTPUT_PLAIN) {
+        dumpEscapedString(pDexFile->StringDataByIdx(idx));
+      } else {
+        dumpXmlAttribute(pDexFile->StringDataByIdx(idx));
+      }
+      break;
+    }
+    case DexFile::kDexAnnotationType: {
+      const u4 str_idx = static_cast<u4>(readVarWidth(data, arg, false));
+      fputs(pDexFile->StringByTypeIdx(str_idx), gOutFile);
+      break;
+    }
+    case DexFile::kDexAnnotationField:
+    case DexFile::kDexAnnotationEnum: {
+      const u4 field_idx = static_cast<u4>(readVarWidth(data, arg, false));
+      const DexFile::FieldId& pFieldId = pDexFile->GetFieldId(field_idx);
+      fputs(pDexFile->StringDataByIdx(pFieldId.name_idx_), gOutFile);
+      break;
+    }
+    case DexFile::kDexAnnotationMethod: {
+      const u4 method_idx = static_cast<u4>(readVarWidth(data, arg, false));
+      const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(method_idx);
+      fputs(pDexFile->StringDataByIdx(pMethodId.name_idx_), gOutFile);
+      break;
+    }
+    case DexFile::kDexAnnotationArray: {
+      fputc('{', gOutFile);
+      // Decode and display all elements.
+      const u4 size = DecodeUnsignedLeb128(data);
+      for (u4 i = 0; i < size; i++) {
+        fputc(' ', gOutFile);
+        dumpEncodedValue(pDexFile, data);
+      }
+      fputs(" }", gOutFile);
+      break;
+    }
+    case DexFile::kDexAnnotationAnnotation: {
+      const u4 type_idx = DecodeUnsignedLeb128(data);
+      fputs(pDexFile->StringByTypeIdx(type_idx), gOutFile);
+      // Decode and display all name=value pairs.
+      const u4 size = DecodeUnsignedLeb128(data);
+      for (u4 i = 0; i < size; i++) {
+        const u4 name_idx = DecodeUnsignedLeb128(data);
+        fputc(' ', gOutFile);
+        fputs(pDexFile->StringDataByIdx(name_idx), gOutFile);
+        fputc('=', gOutFile);
+        dumpEncodedValue(pDexFile, data);
+      }
+      break;
+    }
+    case DexFile::kDexAnnotationNull:
+      fputs("null", gOutFile);
+      break;
+    case DexFile::kDexAnnotationBoolean:
+      fputs(strBool(arg), gOutFile);
+      break;
+    default:
+      fputs("????", gOutFile);
+      break;
+  }  // switch
+}
+
+/*
+ * Dumps encoded value with prefix.
+ */
+static void dumpEncodedValue(const DexFile* pDexFile, const u1** data) {
+  const u1 enc = *(*data)++;
+  dumpEncodedValue(pDexFile, data, enc & 0x1f, enc >> 5);
+}
+
+/*
  * Dumps the file header.
- *
- * Note that some of the : are misaligned on purpose to preserve
- * the exact output of the original Dalvik dexdump.
  */
 static void dumpFileHeader(const DexFile* pDexFile) {
   const DexFile::Header& pHeader = pDexFile->GetHeader();
@@ -373,8 +567,8 @@
   fprintf(gOutFile, "type_ids_size       : %d\n", pHeader.type_ids_size_);
   fprintf(gOutFile, "type_ids_off        : %d (0x%06x)\n",
           pHeader.type_ids_off_, pHeader.type_ids_off_);
-  fprintf(gOutFile, "proto_ids_size       : %d\n", pHeader.proto_ids_size_);
-  fprintf(gOutFile, "proto_ids_off        : %d (0x%06x)\n",
+  fprintf(gOutFile, "proto_ids_size      : %d\n", pHeader.proto_ids_size_);
+  fprintf(gOutFile, "proto_ids_off       : %d (0x%06x)\n",
           pHeader.proto_ids_off_, pHeader.proto_ids_off_);
   fprintf(gOutFile, "field_ids_size      : %d\n", pHeader.field_ids_size_);
   fprintf(gOutFile, "field_ids_off       : %d (0x%06x)\n",
@@ -426,6 +620,99 @@
   fprintf(gOutFile, "\n");
 }
 
+/**
+ * Dumps an annotation set item.
+ */
+static void dumpAnnotationSetItem(const DexFile* pDexFile, const DexFile::AnnotationSetItem* set_item) {
+  if (set_item == nullptr || set_item->size_ == 0) {
+    fputs("  empty-annotation-set\n", gOutFile);
+    return;
+  }
+  for (u4 i = 0; i < set_item->size_; i++) {
+    const DexFile::AnnotationItem* annotation = pDexFile->GetAnnotationItem(set_item, i);
+    if (annotation == nullptr) {
+      continue;
+    }
+    fputs("  ", gOutFile);
+    switch (annotation->visibility_) {
+      case DexFile::kDexVisibilityBuild:   fputs("VISIBILITY_BUILD ",   gOutFile); break;
+      case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", gOutFile); break;
+      case DexFile::kDexVisibilitySystem:  fputs("VISIBILITY_SYSTEM ",  gOutFile); break;
+      default:                             fputs("VISIBILITY_UNKNOWN ", gOutFile); break;
+    }  // switch
+    // Decode raw bytes in annotation.
+    const u1* rData = annotation->annotation_;
+    dumpEncodedValue(pDexFile, &rData, DexFile::kDexAnnotationAnnotation, 0);
+    fputc('\n', gOutFile);
+  }
+}
+
+/*
+ * Dumps class annotations.
+ */
+static void dumpClassAnnotations(const DexFile* pDexFile, int idx) {
+  const DexFile::ClassDef& pClassDef = pDexFile->GetClassDef(idx);
+  const DexFile::AnnotationsDirectoryItem* dir = pDexFile->GetAnnotationsDirectory(pClassDef);
+  if (dir == nullptr) {
+    return;  // none
+  }
+
+  fprintf(gOutFile, "Class #%d annotations:\n", idx);
+
+  const DexFile::AnnotationSetItem* class_set_item = pDexFile->GetClassAnnotationSet(dir);
+  const DexFile::FieldAnnotationsItem* fields = pDexFile->GetFieldAnnotations(dir);
+  const DexFile::MethodAnnotationsItem* methods = pDexFile->GetMethodAnnotations(dir);
+  const DexFile::ParameterAnnotationsItem* pars = pDexFile->GetParameterAnnotations(dir);
+
+  // Annotations on the class itself.
+  if (class_set_item != nullptr) {
+    fprintf(gOutFile, "Annotations on class\n");
+    dumpAnnotationSetItem(pDexFile, class_set_item);
+  }
+
+  // Annotations on fields.
+  if (fields != nullptr) {
+    for (u4 i = 0; i < dir->fields_size_; i++) {
+      const u4 field_idx = fields[i].field_idx_;
+      const DexFile::FieldId& pFieldId = pDexFile->GetFieldId(field_idx);
+      const char* field_name = pDexFile->StringDataByIdx(pFieldId.name_idx_);
+      fprintf(gOutFile, "Annotations on field #%u '%s'\n", field_idx, field_name);
+      dumpAnnotationSetItem(pDexFile, pDexFile->GetFieldAnnotationSetItem(fields[i]));
+    }
+  }
+
+  // Annotations on methods.
+  if (methods != nullptr) {
+    for (u4 i = 0; i < dir->methods_size_; i++) {
+      const u4 method_idx = methods[i].method_idx_;
+      const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(method_idx);
+      const char* method_name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
+      fprintf(gOutFile, "Annotations on method #%u '%s'\n", method_idx, method_name);
+      dumpAnnotationSetItem(pDexFile, pDexFile->GetMethodAnnotationSetItem(methods[i]));
+    }
+  }
+
+  // Annotations on method parameters.
+  if (pars != nullptr) {
+    for (u4 i = 0; i < dir->parameters_size_; i++) {
+      const u4 method_idx = pars[i].method_idx_;
+      const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(method_idx);
+      const char* method_name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
+      fprintf(gOutFile, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
+      const DexFile::AnnotationSetRefList*
+          list = pDexFile->GetParameterAnnotationSetRefList(&pars[i]);
+      if (list != nullptr) {
+        for (u4 j = 0; j < list->size_; j++) {
+          fprintf(gOutFile, "#%u\n", j);
+          dumpAnnotationSetItem(pDexFile, pDexFile->GetSetRefItemItem(&list->list_[j]));
+        }
+      }
+    }
+  }
+
+  fputc('\n', gOutFile);
+}
+
 /*
  * Dumps an interface that a class declares to implement.
  */
@@ -434,9 +721,8 @@
   if (gOptions.outputFormat == OUTPUT_PLAIN) {
     fprintf(gOutFile, "    #%d              : '%s'\n", i, interfaceName);
   } else {
-    char* dotted = descriptorToDot(interfaceName);
-    fprintf(gOutFile, "<implements name=\"%s\">\n</implements>\n", dotted);
-    free(dotted);
+    std::unique_ptr<char[]> dot(descriptorToDot(interfaceName));
+    fprintf(gOutFile, "<implements name=\"%s\">\n</implements>\n", dot.get());
   }
 }
 
@@ -488,15 +774,13 @@
 
 /*
  * Helper for dumpInstruction(), which builds the string
- * representation for the index in the given instruction. This will
- * first try to use the given buffer, but if the result won't fit,
- * then this will allocate a new buffer to hold the result. A pointer
- * to the buffer which holds the full result is always returned, and
- * this can be compared with the one passed in, to see if the result
- * needs to be free()d.
+ * representation for the index in the given instruction.
+ * Returns a pointer to a buffer of sufficient size.
  */
-static char* indexString(const DexFile* pDexFile,
-                         const Instruction* pDecInsn, char* buf, size_t bufSize) {
+static std::unique_ptr<char[]> indexString(const DexFile* pDexFile,
+                                           const Instruction* pDecInsn,
+                                           size_t bufSize) {
+  std::unique_ptr<char[]> buf(new char[bufSize]);
   // Determine index and width of the string.
   u4 index = 0;
   u4 width = 4;
@@ -532,27 +816,27 @@
     case Instruction::kIndexUnknown:
       // This function should never get called for this type, but do
       // something sensible here, just to help with debugging.
-      outSize = snprintf(buf, bufSize, "<unknown-index>");
+      outSize = snprintf(buf.get(), bufSize, "<unknown-index>");
       break;
     case Instruction::kIndexNone:
       // This function should never get called for this type, but do
       // something sensible here, just to help with debugging.
-      outSize = snprintf(buf, bufSize, "<no-index>");
+      outSize = snprintf(buf.get(), bufSize, "<no-index>");
       break;
     case Instruction::kIndexTypeRef:
       if (index < pDexFile->GetHeader().type_ids_size_) {
         const char* tp = pDexFile->StringByTypeIdx(index);
-        outSize = snprintf(buf, bufSize, "%s // type@%0*x", tp, width, index);
+        outSize = snprintf(buf.get(), bufSize, "%s // type@%0*x", tp, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<type?> // type@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<type?> // type@%0*x", width, index);
       }
       break;
     case Instruction::kIndexStringRef:
       if (index < pDexFile->GetHeader().string_ids_size_) {
         const char* st = pDexFile->StringDataByIdx(index);
-        outSize = snprintf(buf, bufSize, "\"%s\" // string@%0*x", st, width, index);
+        outSize = snprintf(buf.get(), bufSize, "\"%s\" // string@%0*x", st, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<string?> // string@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<string?> // string@%0*x", width, index);
       }
       break;
     case Instruction::kIndexMethodRef:
@@ -561,10 +845,10 @@
         const char* name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
         const Signature signature = pDexFile->GetMethodSignature(pMethodId);
         const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
-        outSize = snprintf(buf, bufSize, "%s.%s:%s // method@%0*x",
+        outSize = snprintf(buf.get(), bufSize, "%s.%s:%s // method@%0*x",
                            backDescriptor, name, signature.ToString().c_str(), width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<method?> // method@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<method?> // method@%0*x", width, index);
       }
       break;
     case Instruction::kIndexFieldRef:
@@ -573,38 +857,33 @@
         const char* name = pDexFile->StringDataByIdx(pFieldId.name_idx_);
         const char* typeDescriptor = pDexFile->StringByTypeIdx(pFieldId.type_idx_);
         const char* backDescriptor = pDexFile->StringByTypeIdx(pFieldId.class_idx_);
-        outSize = snprintf(buf, bufSize, "%s.%s:%s // field@%0*x",
+        outSize = snprintf(buf.get(), bufSize, "%s.%s:%s // field@%0*x",
                            backDescriptor, name, typeDescriptor, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<field?> // field@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<field?> // field@%0*x", width, index);
       }
       break;
     case Instruction::kIndexVtableOffset:
-      outSize = snprintf(buf, bufSize, "[%0*x] // vtable #%0*x",
+      outSize = snprintf(buf.get(), bufSize, "[%0*x] // vtable #%0*x",
                          width, index, width, index);
       break;
     case Instruction::kIndexFieldOffset:
-      outSize = snprintf(buf, bufSize, "[obj+%0*x]", width, index);
+      outSize = snprintf(buf.get(), bufSize, "[obj+%0*x]", width, index);
       break;
     // SOME NOT SUPPORTED:
     // case Instruction::kIndexVaries:
     // case Instruction::kIndexInlineMethod:
     default:
-      outSize = snprintf(buf, bufSize, "<?>");
+      outSize = snprintf(buf.get(), bufSize, "<?>");
       break;
   }  // switch
 
   // Determine success of string construction.
   if (outSize >= bufSize) {
-    // The buffer wasn't big enough; allocate and retry. Note:
-    // snprintf() doesn't count the '\0' as part of its returned
-    // size, so we add explicit space for it here.
-    outSize++;
-    buf = reinterpret_cast<char*>(malloc(outSize));
-    if (buf == nullptr) {
-      return nullptr;
-    }
-    return indexString(pDexFile, pDecInsn, buf, outSize);
+    // The buffer wasn't big enough; retry with computed size. Note: snprintf()
+    // doesn't count/ the '\0' as part of its returned size, so we add explicit
+    // space for it here.
+    return indexString(pDexFile, pDecInsn, outSize + 1);
   }
   return buf;
 }
@@ -652,11 +931,9 @@
   }
 
   // Set up additional argument.
-  char indexBufChars[200];
-  char *indexBuf = indexBufChars;
+  std::unique_ptr<char[]> indexBuf;
   if (Instruction::IndexTypeOf(pDecInsn->Opcode()) != Instruction::kIndexNone) {
-    indexBuf = indexString(pDexFile, pDecInsn,
-                           indexBufChars, sizeof(indexBufChars));
+    indexBuf = indexString(pDexFile, pDecInsn, 200);
   }
 
   // Dump the instruction.
@@ -677,27 +954,25 @@
       fprintf(gOutFile, " v%d", pDecInsn->VRegA());
       break;
     case Instruction::k10t:        // op +AA
-    case Instruction::k20t:        // op +AAAA
-      {
-        const s4 targ = (s4) pDecInsn->VRegA();
-        fprintf(gOutFile, " %04x // %c%04x",
-                insnIdx + targ,
-                (targ < 0) ? '-' : '+',
-                (targ < 0) ? -targ : targ);
-      }
+    case Instruction::k20t: {      // op +AAAA
+      const s4 targ = (s4) pDecInsn->VRegA();
+      fprintf(gOutFile, " %04x // %c%04x",
+              insnIdx + targ,
+              (targ < 0) ? '-' : '+',
+              (targ < 0) ? -targ : targ);
       break;
+    }
     case Instruction::k22x:        // op vAA, vBBBB
       fprintf(gOutFile, " v%d, v%d", pDecInsn->VRegA(), pDecInsn->VRegB());
       break;
-    case Instruction::k21t:        // op vAA, +BBBB
-      {
-        const s4 targ = (s4) pDecInsn->VRegB();
-        fprintf(gOutFile, " v%d, %04x // %c%04x", pDecInsn->VRegA(),
-                insnIdx + targ,
-                (targ < 0) ? '-' : '+',
-                (targ < 0) ? -targ : targ);
-      }
+    case Instruction::k21t: {     // op vAA, +BBBB
+      const s4 targ = (s4) pDecInsn->VRegB();
+      fprintf(gOutFile, " v%d, %04x // %c%04x", pDecInsn->VRegA(),
+              insnIdx + targ,
+              (targ < 0) ? '-' : '+',
+              (targ < 0) ? -targ : targ);
       break;
+    }
     case Instruction::k21s:        // op vAA, #+BBBB
       fprintf(gOutFile, " v%d, #int %d // #%x",
               pDecInsn->VRegA(), (s4) pDecInsn->VRegB(), (u2)pDecInsn->VRegB());
@@ -716,7 +991,7 @@
       break;
     case Instruction::k21c:        // op vAA, thing@BBBB
     case Instruction::k31c:        // op vAA, thing@BBBBBBBB
-      fprintf(gOutFile, " v%d, %s", pDecInsn->VRegA(), indexBuf);
+      fprintf(gOutFile, " v%d, %s", pDecInsn->VRegA(), indexBuf.get());
       break;
     case Instruction::k23x:        // op vAA, vBB, vCC
       fprintf(gOutFile, " v%d, v%d, v%d",
@@ -727,16 +1002,15 @@
               pDecInsn->VRegA(), pDecInsn->VRegB(),
               (s4) pDecInsn->VRegC(), (u1) pDecInsn->VRegC());
       break;
-    case Instruction::k22t:        // op vA, vB, +CCCC
-      {
-        const s4 targ = (s4) pDecInsn->VRegC();
-        fprintf(gOutFile, " v%d, v%d, %04x // %c%04x",
-                pDecInsn->VRegA(), pDecInsn->VRegB(),
-                insnIdx + targ,
-                (targ < 0) ? '-' : '+',
-                (targ < 0) ? -targ : targ);
-      }
+    case Instruction::k22t: {      // op vA, vB, +CCCC
+      const s4 targ = (s4) pDecInsn->VRegC();
+      fprintf(gOutFile, " v%d, v%d, %04x // %c%04x",
+              pDecInsn->VRegA(), pDecInsn->VRegB(),
+              insnIdx + targ,
+              (targ < 0) ? '-' : '+',
+              (targ < 0) ? -targ : targ);
       break;
+    }
     case Instruction::k22s:        // op vA, vB, #+CCCC
       fprintf(gOutFile, " v%d, v%d, #int %d // #%04x",
               pDecInsn->VRegA(), pDecInsn->VRegB(),
@@ -746,23 +1020,22 @@
     // NOT SUPPORTED:
     // case Instruction::k22cs:    // [opt] op vA, vB, field offset CCCC
       fprintf(gOutFile, " v%d, v%d, %s",
-              pDecInsn->VRegA(), pDecInsn->VRegB(), indexBuf);
+              pDecInsn->VRegA(), pDecInsn->VRegB(), indexBuf.get());
       break;
     case Instruction::k30t:
       fprintf(gOutFile, " #%08x", pDecInsn->VRegA());
       break;
-    case Instruction::k31i:        // op vAA, #+BBBBBBBB
-      {
-        // This is often, but not always, a float.
-        union {
-          float f;
-          u4 i;
-        } conv;
-        conv.i = pDecInsn->VRegB();
-        fprintf(gOutFile, " v%d, #float %f // #%08x",
-                pDecInsn->VRegA(), conv.f, pDecInsn->VRegB());
-      }
+    case Instruction::k31i: {     // op vAA, #+BBBBBBBB
+      // This is often, but not always, a float.
+      union {
+        float f;
+        u4 i;
+      } conv;
+      conv.i = pDecInsn->VRegB();
+      fprintf(gOutFile, " v%d, #float %g // #%08x",
+              pDecInsn->VRegA(), conv.f, pDecInsn->VRegB());
       break;
+    }
     case Instruction::k31t:       // op vAA, offset +BBBBBBBB
       fprintf(gOutFile, " v%d, %08x // +%08x",
               pDecInsn->VRegA(), insnIdx + pDecInsn->VRegB(), pDecInsn->VRegB());
@@ -770,39 +1043,23 @@
     case Instruction::k32x:        // op vAAAA, vBBBB
       fprintf(gOutFile, " v%d, v%d", pDecInsn->VRegA(), pDecInsn->VRegB());
       break;
-    case Instruction::k35c:        // op {vC, vD, vE, vF, vG}, thing@BBBB
+    case Instruction::k35c: {      // op {vC, vD, vE, vF, vG}, thing@BBBB
     // NOT SUPPORTED:
     // case Instruction::k35ms:       // [opt] invoke-virtual+super
     // case Instruction::k35mi:       // [opt] inline invoke
-      {
-        u4 arg[Instruction::kMaxVarArgRegs];
-        pDecInsn->GetVarArgs(arg);
-        fputs(" {", gOutFile);
-        for (int i = 0, n = pDecInsn->VRegA(); i < n; i++) {
-          if (i == 0) {
-            fprintf(gOutFile, "v%d", arg[i]);
-          } else {
-            fprintf(gOutFile, ", v%d", arg[i]);
-          }
-        }  // for
-        fprintf(gOutFile, "}, %s", indexBuf);
-      }
+      u4 arg[Instruction::kMaxVarArgRegs];
+      pDecInsn->GetVarArgs(arg);
+      fputs(" {", gOutFile);
+      for (int i = 0, n = pDecInsn->VRegA(); i < n; i++) {
+        if (i == 0) {
+          fprintf(gOutFile, "v%d", arg[i]);
+        } else {
+          fprintf(gOutFile, ", v%d", arg[i]);
+        }
+      }  // for
+      fprintf(gOutFile, "}, %s", indexBuf.get());
       break;
-    case Instruction::k25x:        // op vC, {vD, vE, vF, vG} (B: count)
-      {
-        u4 arg[Instruction::kMaxVarArgRegs25x];
-        pDecInsn->GetAllArgs25x(arg);
-        fprintf(gOutFile, " v%d, {", arg[0]);
-        for (int i = 0, n = pDecInsn->VRegB(); i < n; i++) {
-          if (i == 0) {
-            fprintf(gOutFile, "v%d", arg[Instruction::kLambdaVirtualRegisterWidth + i]);
-          } else {
-            fprintf(gOutFile, ", v%d", arg[Instruction::kLambdaVirtualRegisterWidth + i]);
-          }
-        }  // for
-        fputc('}', gOutFile);
-      }
-      break;
+    }
     case Instruction::k3rc:        // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
     // NOT SUPPORTED:
     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
@@ -818,21 +1075,20 @@
             fprintf(gOutFile, ", v%d", pDecInsn->VRegC() + i);
           }
         }  // for
-        fprintf(gOutFile, "}, %s", indexBuf);
+        fprintf(gOutFile, "}, %s", indexBuf.get());
       }
       break;
-    case Instruction::k51l:        // op vAA, #+BBBBBBBBBBBBBBBB
-      {
-        // This is often, but not always, a double.
-        union {
-          double d;
-          u8 j;
-        } conv;
-        conv.j = pDecInsn->WideVRegB();
-        fprintf(gOutFile, " v%d, #double %f // #%016" PRIx64,
-                pDecInsn->VRegA(), conv.d, pDecInsn->WideVRegB());
-      }
+    case Instruction::k51l: {      // op vAA, #+BBBBBBBBBBBBBBBB
+      // This is often, but not always, a double.
+      union {
+        double d;
+        u8 j;
+      } conv;
+      conv.j = pDecInsn->WideVRegB();
+      fprintf(gOutFile, " v%d, #double %g // #%016" PRIx64,
+              pDecInsn->VRegA(), conv.d, pDecInsn->WideVRegB());
       break;
+    }
     // NOT SUPPORTED:
     // case Instruction::k00x:        // unknown op or breakpoint
     //    break;
@@ -842,10 +1098,6 @@
   }  // switch
 
   fputc('\n', gOutFile);
-
-  if (indexBuf != indexBufChars) {
-    free(indexBuf);
-  }
 }
 
 /*
@@ -859,11 +1111,9 @@
   const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
 
   // Generate header.
-  char* tmp = descriptorToDot(backDescriptor);
-  fprintf(gOutFile, "%06x:                                        "
-          "|[%06x] %s.%s:%s\n",
-          codeOffset, codeOffset, tmp, name, signature.ToString().c_str());
-  free(tmp);
+  std::unique_ptr<char[]> dot(descriptorToDot(backDescriptor));
+  fprintf(gOutFile, "%06x:                                        |[%06x] %s.%s:%s\n",
+          codeOffset, codeOffset, dot.get(), name, signature.ToString().c_str());
 
   // Iterate over all instructions.
   const u2* insns = pCode->insns_;
@@ -942,12 +1192,10 @@
 
     // Method name and prototype.
     if (constructor) {
-      char* tmp = descriptorClassToDot(backDescriptor);
-      fprintf(gOutFile, "<constructor name=\"%s\"\n", tmp);
-      free(tmp);
-      tmp = descriptorToDot(backDescriptor);
-      fprintf(gOutFile, " type=\"%s\"\n", tmp);
-      free(tmp);
+      std::unique_ptr<char[]> dot(descriptorClassToDot(backDescriptor));
+      fprintf(gOutFile, "<constructor name=\"%s\"\n", dot.get());
+      dot = descriptorToDot(backDescriptor);
+      fprintf(gOutFile, " type=\"%s\"\n", dot.get());
     } else {
       fprintf(gOutFile, "<method name=\"%s\"\n", name);
       const char* returnType = strrchr(typeDescriptor, ')');
@@ -955,9 +1203,8 @@
         fprintf(stderr, "bad method type descriptor '%s'\n", typeDescriptor);
         goto bail;
       }
-      char* tmp = descriptorToDot(returnType+1);
-      fprintf(gOutFile, " return=\"%s\"\n", tmp);
-      free(tmp);
+      std::unique_ptr<char[]> dot(descriptorToDot(returnType + 1));
+      fprintf(gOutFile, " return=\"%s\"\n", dot.get());
       fprintf(gOutFile, " abstract=%s\n", quotedBool((flags & kAccAbstract) != 0));
       fprintf(gOutFile, " native=%s\n", quotedBool((flags & kAccNative) != 0));
       fprintf(gOutFile, " synchronized=%s\n", quotedBool(
@@ -990,18 +1237,17 @@
         } while (*cp++ != ';');
       } else {
         // Primitive char, copy it.
-        if (strchr("ZBCSIFJD", *base) == NULL) {
+        if (strchr("ZBCSIFJD", *base) == nullptr) {
           fprintf(stderr, "ERROR: bad method signature '%s'\n", base);
-          goto bail;
+          break;  // while
         }
         *cp++ = *base++;
       }
       // Null terminate and display.
       *cp++ = '\0';
-      char* tmp = descriptorToDot(tmpBuf);
+      std::unique_ptr<char[]> dot(descriptorToDot(tmpBuf));
       fprintf(gOutFile, "<parameter name=\"arg%d\" type=\"%s\">\n"
-                        "</parameter>\n", argNum++, tmp);
-      free(tmp);
+                        "</parameter>\n", argNum++, dot.get());
     }  // while
     free(tmpBuf);
     if (constructor) {
@@ -1017,126 +1263,9 @@
 }
 
 /*
- * Dumps a string value with some escape characters.
- */
-static void dumpEscapedString(const char* p) {
-  for (; *p; p++) {
-    switch (*p) {
-      case '\\':
-        fputs("\\\\", gOutFile);
-        break;
-      case '\"':
-        fputs("\\\"", gOutFile);
-        break;
-      case '\t':
-        fputs("\\t", gOutFile);
-        break;
-      case '\n':
-        fputs("\\n", gOutFile);
-        break;
-      case '\r':
-        fputs("\\r", gOutFile);
-        break;
-      default:
-        putc(*p, gOutFile);
-    }
-  }
-}
-
-/*
- * Dumps an XML attribute value between double-quotes.
- */
-static void dumpXmlAttribute(const char* p) {
-  for (; *p; p++) {
-    switch (*p) {
-      case '&':
-        fputs("&amp;", gOutFile);
-        break;
-      case '<':
-        fputs("&lt;", gOutFile);
-        break;
-      case '"':
-        fputs("&quot;", gOutFile);
-        break;
-      case '\t':
-        fputs("&#x9;", gOutFile);
-        break;
-      case '\n':
-        fputs("&#xA;", gOutFile);
-        break;
-      case '\r':
-        fputs("&#xD;", gOutFile);
-        break;
-      default:
-        putc(*p, gOutFile);
-    }
-  }
-}
-
-/*
- * Dumps a value of static (class) field.
- */
-static void dumpSFieldValue(const DexFile* pDexFile,
-                            EncodedStaticFieldValueIterator::ValueType valueType,
-                            const jvalue* pValue) {
-  switch (valueType) {
-    case EncodedStaticFieldValueIterator::kByte:
-      fprintf(gOutFile, "%" PRIu8, pValue->b);
-      break;
-    case EncodedStaticFieldValueIterator::kShort:
-      fprintf(gOutFile, "%" PRId16, pValue->s);
-      break;
-    case EncodedStaticFieldValueIterator::kChar:
-      fprintf(gOutFile, "%" PRIu16, pValue->c);
-      break;
-    case EncodedStaticFieldValueIterator::kInt:
-      fprintf(gOutFile, "%" PRId32, pValue->i);
-      break;
-    case EncodedStaticFieldValueIterator::kLong:
-      fprintf(gOutFile, "%" PRId64, pValue->j);
-      break;
-    case EncodedStaticFieldValueIterator::kFloat:
-      fprintf(gOutFile, "%f", pValue->f);
-      break;
-    case EncodedStaticFieldValueIterator::kDouble:
-      fprintf(gOutFile, "%f", pValue->d);
-      break;
-    case EncodedStaticFieldValueIterator::kString: {
-      const char* str =
-          pDexFile->GetStringData(pDexFile->GetStringId(pValue->i));
-      if (gOptions.outputFormat == OUTPUT_PLAIN) {
-        fputs("\"", gOutFile);
-        dumpEscapedString(str);
-        fputs("\"", gOutFile);
-      } else {
-        dumpXmlAttribute(str);
-      }
-      break;
-    }
-    case EncodedStaticFieldValueIterator::kNull:
-      fputs("null", gOutFile);
-      break;
-    case EncodedStaticFieldValueIterator::kBoolean:
-      fputs(pValue->z ? "true" : "false", gOutFile);
-      break;
-
-    case EncodedStaticFieldValueIterator::kAnnotation:
-    case EncodedStaticFieldValueIterator::kArray:
-    case EncodedStaticFieldValueIterator::kEnum:
-    case EncodedStaticFieldValueIterator::kField:
-    case EncodedStaticFieldValueIterator::kMethod:
-    case EncodedStaticFieldValueIterator::kType:
-    default:
-      fprintf(gOutFile, "Unexpected static field type: %d", valueType);
-  }
-}
-
-/*
  * Dumps a static (class) field.
  */
-static void dumpSField(const DexFile* pDexFile, u4 idx, u4 flags, int i,
-                       EncodedStaticFieldValueIterator::ValueType valueType,
-                       const jvalue* pValue) {
+static void dumpSField(const DexFile* pDexFile, u4 idx, u4 flags, int i, const u1** data) {
   // Bail for anything private if export only requested.
   if (gOptions.exportsOnly && (flags & (kAccPublic | kAccProtected)) == 0) {
     return;
@@ -1153,16 +1282,15 @@
     fprintf(gOutFile, "      name          : '%s'\n", name);
     fprintf(gOutFile, "      type          : '%s'\n", typeDescriptor);
     fprintf(gOutFile, "      access        : 0x%04x (%s)\n", flags, accessStr);
-    if (pValue != nullptr) {
+    if (data != nullptr) {
       fputs("      value         : ", gOutFile);
-      dumpSFieldValue(pDexFile, valueType, pValue);
+      dumpEncodedValue(pDexFile, data);
       fputs("\n", gOutFile);
     }
   } else if (gOptions.outputFormat == OUTPUT_XML) {
     fprintf(gOutFile, "<field name=\"%s\"\n", name);
-    char *tmp = descriptorToDot(typeDescriptor);
-    fprintf(gOutFile, " type=\"%s\"\n", tmp);
-    free(tmp);
+    std::unique_ptr<char[]> dot(descriptorToDot(typeDescriptor));
+    fprintf(gOutFile, " type=\"%s\"\n", dot.get());
     fprintf(gOutFile, " transient=%s\n", quotedBool((flags & kAccTransient) != 0));
     fprintf(gOutFile, " volatile=%s\n", quotedBool((flags & kAccVolatile) != 0));
     // The "value=" is not knowable w/o parsing annotations.
@@ -1170,9 +1298,9 @@
     fprintf(gOutFile, " final=%s\n", quotedBool((flags & kAccFinal) != 0));
     // The "deprecated=" is not knowable w/o parsing annotations.
     fprintf(gOutFile, " visibility=%s\n", quotedVisibility(flags));
-    if (pValue != nullptr) {
+    if (data != nullptr) {
       fputs(" value=\"", gOutFile);
-      dumpSFieldValue(pDexFile, valueType, pValue);
+      dumpEncodedValue(pDexFile, data);
       fputs("\"\n", gOutFile);
     }
     fputs(">\n</field>\n", gOutFile);
@@ -1185,8 +1313,7 @@
  * Dumps an instance field.
  */
 static void dumpIField(const DexFile* pDexFile, u4 idx, u4 flags, int i) {
-  dumpSField(pDexFile, idx, flags, i,
-             EncodedStaticFieldValueIterator::kByte, nullptr);
+  dumpSField(pDexFile, idx, flags, i, nullptr);
 }
 
 /*
@@ -1196,7 +1323,7 @@
  */
 
 static void dumpCfg(const DexFile* dex_file,
-                    uint32_t dex_method_idx,
+                    u4 dex_method_idx,
                     const DexFile::CodeItem* code_item) {
   if (code_item != nullptr) {
     std::ostringstream oss;
@@ -1207,7 +1334,7 @@
 
 static void dumpCfg(const DexFile* dex_file, int idx) {
   const DexFile::ClassDef& class_def = dex_file->GetClassDef(idx);
-  const uint8_t* class_data = dex_file->GetClassData(class_def);
+  const u1* class_data = dex_file->GetClassData(class_def);
   if (class_data == nullptr) {  // empty class such as a marker interface?
     return;
   }
@@ -1248,7 +1375,15 @@
     return;
   }
 
-  if (gOptions.cfg) {
+  if (gOptions.showSectionHeaders) {
+    dumpClassDef(pDexFile, idx);
+  }
+
+  if (gOptions.showAnnotations) {
+    dumpClassAnnotations(pDexFile, idx);
+  }
+
+  if (gOptions.showCfg) {
     dumpCfg(pDexFile, idx);
     return;
   }
@@ -1310,13 +1445,11 @@
     }
     fprintf(gOutFile, "  Interfaces        -\n");
   } else {
-    char* tmp = descriptorClassToDot(classDescriptor);
-    fprintf(gOutFile, "<class name=\"%s\"\n", tmp);
-    free(tmp);
+    std::unique_ptr<char[]> dot(descriptorClassToDot(classDescriptor));
+    fprintf(gOutFile, "<class name=\"%s\"\n", dot.get());
     if (superclassDescriptor != nullptr) {
-      tmp = descriptorToDot(superclassDescriptor);
-      fprintf(gOutFile, " extends=\"%s\"\n", tmp);
-      free(tmp);
+      dot = descriptorToDot(superclassDescriptor);
+      fprintf(gOutFile, " extends=\"%s\"\n", dot.get());
     }
     fprintf(gOutFile, " interface=%s\n",
             quotedBool((pClassDef.access_flags_ & kAccInterface) != 0));
@@ -1347,33 +1480,35 @@
     }
   } else {
     ClassDataItemIterator pClassData(*pDexFile, pEncodedData);
+
+    // Prepare data for static fields.
+    const u1* sData = pDexFile->GetEncodedStaticFieldValuesArray(pClassDef);
+    const u4 sSize = sData != nullptr ? DecodeUnsignedLeb128(&sData) : 0;
+
+    // Static fields.
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Static fields     -\n");
     }
-    EncodedStaticFieldValueIterator staticFieldValues(*pDexFile, pClassDef);
-    for (int i = 0; pClassData.HasNextStaticField(); i++, pClassData.Next()) {
-      EncodedStaticFieldValueIterator::ValueType valueType =
-          EncodedStaticFieldValueIterator::kByte;
-      const jvalue* pValue = nullptr;
-      if (staticFieldValues.HasNext()) {
-        valueType = staticFieldValues.GetValueType();
-        pValue = &staticFieldValues.GetJavaValue();
-      }
-      dumpSField(pDexFile, pClassData.GetMemberIndex(),
-                           pClassData.GetRawMemberAccessFlags(), i,
-                 valueType, pValue);
-      if (staticFieldValues.HasNext()) {
-        staticFieldValues.Next();
-      }
+    for (u4 i = 0; pClassData.HasNextStaticField(); i++, pClassData.Next()) {
+      dumpSField(pDexFile,
+                 pClassData.GetMemberIndex(),
+                 pClassData.GetRawMemberAccessFlags(),
+                 i,
+                 i < sSize ? &sData : nullptr);
     }  // for
-    DCHECK(!staticFieldValues.HasNext());
+
+    // Instance fields.
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Instance fields   -\n");
     }
-    for (int i = 0; pClassData.HasNextInstanceField(); i++, pClassData.Next()) {
-      dumpIField(pDexFile, pClassData.GetMemberIndex(),
-                          pClassData.GetRawMemberAccessFlags(), i);
+    for (u4 i = 0; pClassData.HasNextInstanceField(); i++, pClassData.Next()) {
+      dumpIField(pDexFile,
+                 pClassData.GetMemberIndex(),
+                 pClassData.GetRawMemberAccessFlags(),
+                 i);
     }  // for
+
+    // Direct methods.
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Direct methods    -\n");
     }
@@ -1383,6 +1518,8 @@
                            pClassData.GetMethodCodeItem(),
                            pClassData.GetMethodCodeItemOffset(), i);
     }  // for
+
+    // Virtual methods.
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Virtual methods   -\n");
     }
@@ -1434,9 +1571,6 @@
   char* package = nullptr;
   const u4 classDefsSize = pDexFile->GetHeader().class_defs_size_;
   for (u4 i = 0; i < classDefsSize; i++) {
-    if (gOptions.showSectionHeaders) {
-      dumpClassDef(pDexFile, i);
-    }
     dumpClass(pDexFile, i, &package);
   }  // for
 
@@ -1461,17 +1595,11 @@
   }
 
   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
-  // all of which are Zip archives with "classes.dex" inside. The compressed
-  // data needs to be extracted to a temp file, the location of which varies.
-  //
-  // TODO(ajcbik): fix following issues
-  //
-  // (1) gOptions.tempFileName is not accounted for
-  // (2) gOptions.ignoreBadChecksum is not accounted for
-  //
+  // all of which are Zip archives with "classes.dex" inside.
+  const bool kVerifyChecksum = !gOptions.ignoreBadChecksum;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  if (!DexFile::Open(fileName, fileName, &error_msg, &dex_files)) {
+  if (!DexFile::Open(fileName, fileName, kVerifyChecksum, &error_msg, &dex_files)) {
     // Display returned error message to user. Note that this error behavior
     // differs from the error messages shown by the original Dalvik dexdump.
     fputs(error_msg.c_str(), stderr);
diff --git a/dexdump/dexdump.h b/dexdump/dexdump.h
index 50280a9..6939f90 100644
--- a/dexdump/dexdump.h
+++ b/dexdump/dexdump.h
@@ -42,13 +42,13 @@
   bool disassemble;
   bool exportsOnly;
   bool ignoreBadChecksum;
+  bool showAnnotations;
+  bool showCfg;
   bool showFileHeaders;
   bool showSectionHeaders;
   bool verbose;
-  bool cfg;
   OutputFormat outputFormat;
   const char* outputFileName;
-  const char* tempFileName;
 };
 
 /* Prototypes. */
diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc
index dd1002c..f716ba8 100644
--- a/dexdump/dexdump_main.cc
+++ b/dexdump/dexdump_main.cc
@@ -17,8 +17,8 @@
  *
  * This is a re-implementation of the original dexdump utility that was
  * based on Dalvik functions in libdex into a new dexdump that is now
- * based on Art functions in libart instead. The output is identical to
- * the original for correct DEX files. Error messages may differ, however.
+ * based on Art functions in libart instead. The output is very similar to
+ * to the original for correct DEX files. Error messages may differ, however.
  * Also, ODEX files are no longer supported.
  */
 
@@ -40,19 +40,18 @@
  */
 static void usage(void) {
   fprintf(stderr, "Copyright (C) 2007 The Android Open Source Project\n\n");
-  fprintf(stderr, "%s: [-c] [-d] [-e] [-f] [-h] [-i] [-l layout] [-o outfile]"
-                  " [-t tempfile] dexfile...\n", gProgName);
-  fprintf(stderr, "\n");
+  fprintf(stderr, "%s: [-a] [-c] [-d] [-e] [-f] [-h] [-i] [-l layout] [-o outfile]"
+                  " dexfile...\n\n", gProgName);
+  fprintf(stderr, " -a : display annotations\n");
   fprintf(stderr, " -c : verify checksum and exit\n");
   fprintf(stderr, " -d : disassemble code sections\n");
   fprintf(stderr, " -e : display exported items only\n");
   fprintf(stderr, " -f : display summary information from file header\n");
-  fprintf(stderr, " -g : dump CFG for dex\n");
+  fprintf(stderr, " -g : display CFG for dex\n");
   fprintf(stderr, " -h : display file header details\n");
   fprintf(stderr, " -i : ignore checksum failures\n");
   fprintf(stderr, " -l : output layout, either 'plain' or 'xml'\n");
   fprintf(stderr, " -o : output file name (defaults to stdout)\n");
-  fprintf(stderr, " -t : temp file name (defaults to /sdcard/dex-temp-*)\n");
 }
 
 /*
@@ -70,11 +69,14 @@
 
   // Parse all arguments.
   while (1) {
-    const int ic = getopt(argc, argv, "cdefghil:t:o:");
+    const int ic = getopt(argc, argv, "acdefghil:o:");
     if (ic < 0) {
       break;  // done
     }
     switch (ic) {
+      case 'a':  // display annotations
+        gOptions.showAnnotations = true;
+        break;
       case 'c':  // verify the checksum then exit
         gOptions.checksumOnly = true;
         break;
@@ -84,13 +86,13 @@
       case 'e':  // exported items only
         gOptions.exportsOnly = true;
         break;
-      case 'f':  // dump outer file header
+      case 'f':  // display outer file header
         gOptions.showFileHeaders = true;
         break;
-      case 'g':  // dump cfg
-        gOptions.cfg = true;
+      case 'g':  // display cfg
+        gOptions.showCfg = true;
         break;
-      case 'h':  // dump section headers, i.e. all meta-data
+      case 'h':  // display section headers, i.e. all meta-data
         gOptions.showSectionHeaders = true;
         break;
       case 'i':  // continue even if checksum is bad
@@ -106,9 +108,6 @@
           wantUsage = true;
         }
         break;
-      case 't':  // temp file, used when opening compressed Jar
-        gOptions.tempFileName = optarg;
-        break;
       case 'o':  // output file
         gOptions.outputFileName = optarg;
         break;
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index d20c169..a1bde0e 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -60,18 +60,17 @@
  * final ";" (if any) have been removed and all occurrences of '/'
  * have been changed to '.'.
  */
-static char* descriptorToDot(const char* str) {
-  size_t at = strlen(str);
+static std::unique_ptr<char[]> descriptorToDot(const char* str) {
+  size_t len = strlen(str);
   if (str[0] == 'L') {
-    at -= 2;  // Two fewer chars to copy.
-    str++;
+    len -= 2;  // Two fewer chars to copy (trims L and ;).
+    str++;     // Start past 'L'.
   }
-  char* newStr = reinterpret_cast<char*>(malloc(at + 1));
-  newStr[at] = '\0';
-  while (at > 0) {
-    at--;
-    newStr[at] = (str[at] == '/') ? '.' : str[at];
+  std::unique_ptr<char[]> newStr(new char[len + 1]);
+  for (size_t i = 0; i < len; i++) {
+    newStr[i] = (str[i] == '/') ? '.' : str[i];
   }
+  newStr[len] = '\0';
   return newStr;
 }
 
@@ -103,14 +102,13 @@
   const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(idx);
   const char* methodName = pDexFile->StringDataByIdx(pMethodId.name_idx_);
   const char* classDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
-  char* className = descriptorToDot(classDescriptor);
+  std::unique_ptr<char[]> className(descriptorToDot(classDescriptor));
   const u4 insnsOff = codeOffset + 0x10;
 
   // Don't list methods that do not match a particular query.
   if (gOptions.methodToFind != nullptr &&
-      (strcmp(gOptions.classToFind, className) != 0 ||
+      (strcmp(gOptions.classToFind, className.get()) != 0 ||
        strcmp(gOptions.methodToFind, methodName) != 0)) {
-    free(className);
     return;
   }
 
@@ -130,10 +128,9 @@
   // Dump actual method information.
   fprintf(gOutFile, "0x%08x %d %s %s %s %s %d\n",
           insnsOff, pCode->insns_size_in_code_units_ * 2,
-          className, methodName, typeDesc, fileName, firstLine);
+          className.get(), methodName, typeDesc, fileName, firstLine);
 
   free(typeDesc);
-  free(className);
 }
 
 /*
@@ -180,9 +177,10 @@
 static int processFile(const char* fileName) {
   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
   // all of which are Zip archives with "classes.dex" inside.
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  if (!DexFile::Open(fileName, fileName, &error_msg, &dex_files)) {
+  if (!DexFile::Open(fileName, fileName, kVerifyChecksum, &error_msg, &dex_files)) {
     fputs(error_msg.c_str(), stderr);
     fputc('\n', stderr);
     return -1;
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index bf563c7..db327fc 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -59,17 +59,18 @@
   LOCAL_SRC_FILES := $$(LIBART_DISASSEMBLER_SRC_FILES)
 
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,$(2))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
@@ -89,9 +90,9 @@
   LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
   # For disassembler_arm64.
   ifeq ($$(art_ndebug_or_debug),debug)
-     LOCAL_SHARED_LIBRARIES += libvixl
+    LOCAL_SHARED_LIBRARIES += libvixld-arm64
   else
-     LOCAL_SHARED_LIBRARIES += libvixl
+    LOCAL_SHARED_LIBRARIES += libvixl-arm64
   endif
   ifeq ($$(art_target_or_host),target)
     include $(BUILD_SHARED_LIBRARY)
diff --git a/disassembler/disassembler.cc b/disassembler/disassembler.cc
index e604c1f..bcd0d16 100644
--- a/disassembler/disassembler.cc
+++ b/disassembler/disassembler.cc
@@ -32,10 +32,8 @@
     return new arm::DisassemblerArm(options);
   } else if (instruction_set == kArm64) {
     return new arm64::DisassemblerArm64(options);
-  } else if (instruction_set == kMips) {
-    return new mips::DisassemblerMips(options, false);
-  } else if (instruction_set == kMips64) {
-    return new mips::DisassemblerMips(options, true);
+  } else if (instruction_set == kMips || instruction_set == kMips64) {
+    return new mips::DisassemblerMips(options);
   } else if (instruction_set == kX86) {
     return new x86::DisassemblerX86(options, false);
   } else if (instruction_set == kX86_64) {
diff --git a/disassembler/disassembler.h b/disassembler/disassembler.h
index b080315..86793cc 100644
--- a/disassembler/disassembler.h
+++ b/disassembler/disassembler.h
@@ -28,8 +28,9 @@
 
 class DisassemblerOptions {
  public:
-  // Should the disassembler print absolute or relative addresses.
-  const bool absolute_addresses_;
+  using ThreadOffsetNameFunction = void (*)(std::ostream& os, uint32_t offset);
+
+  ThreadOffsetNameFunction thread_offset_name_function_;
 
   // Base address for calculating relative code offsets when absolute_addresses_ is false.
   const uint8_t* const base_address_;
@@ -37,6 +38,9 @@
   // End address (exclusive);
   const uint8_t* const end_address_;
 
+  // Should the disassembler print absolute or relative addresses.
+  const bool absolute_addresses_;
+
   // If set, the disassembler is allowed to look at load targets in literal
   // pools.
   const bool can_read_literals_;
@@ -44,10 +48,12 @@
   DisassemblerOptions(bool absolute_addresses,
                       const uint8_t* base_address,
                       const uint8_t* end_address,
-                      bool can_read_literals)
-      : absolute_addresses_(absolute_addresses),
+                      bool can_read_literals,
+                      ThreadOffsetNameFunction fn)
+      : thread_offset_name_function_(fn),
         base_address_(base_address),
         end_address_(end_address),
+        absolute_addresses_(absolute_addresses),
         can_read_literals_(can_read_literals) {}
 
  private:
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 286faf2..a47b6ad 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -25,7 +25,6 @@
 #include "base/bit_utils.h"
 #include "base/logging.h"
 #include "base/stringprintf.h"
-#include "thread.h"
 
 namespace art {
 namespace arm {
@@ -329,7 +328,7 @@
           }
           if (rn.r == 9) {
             args << "  ; ";
-            Thread::DumpThreadOffset<4>(args, offset);
+            GetDisassemblerOptions()->thread_offset_name_function_(args, offset);
           }
         }
       }
@@ -782,23 +781,13 @@
         args << Rm;
 
         // Shift operand.
-        bool noShift = (imm5 == 0 && shift_type != 0x3);
+        bool noShift = (imm5 == 0 && shift_type == 0x0);
         if (!noShift) {
           args << ", ";
-          switch (shift_type) {
-            case 0x0: args << "lsl"; break;
-            case 0x1: args << "lsr"; break;
-            case 0x2: args << "asr"; break;
-            case 0x3:
-              if (imm5 == 0) {
-                args << "rrx";
-              } else {
-                args << "ror #" << imm5;
-              }
-              break;
-          }
-          if (shift_type != 0x3 /* rrx */) {
-            args << StringPrintf(" #%d", (0 != imm5 || 0 == shift_type) ? imm5 : 32);
+          if (shift_type == 0x3u && imm5 == 0u) {
+            args << "rrx";
+          } else {
+            args << kThumb2ShiftOperations[shift_type] << " #" << ((0 != imm5) ? imm5 : 32);
           }
         }
 
@@ -951,17 +940,11 @@
                 opcode << (op != 0 ? "vsqrt" : "vneg") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", " << m;
               } else if (op5 == 4) {
-                opcode << "vcmp" << (S != 0 ? ".f64" : ".f32");
+                opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", " << m;
-                if (op != 0) {
-                  args << " (quiet nan)";
-                }
               } else if (op5 == 5) {
-                opcode << "vcmpe" << (S != 0 ? ".f64" : ".f32");
+                opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32");
                 args << d << ", #0.0";
-                if (op != 0) {
-                  args << " (quiet nan)";
-                }
                 if ((instr & 0x2f) != 0) {
                   args << " (UNPREDICTABLE)";
                 }
@@ -1417,7 +1400,7 @@
             args << Rt << ", [" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]";
             if (Rn.r == TR && is_load) {
               args << "  ; ";
-              Thread::DumpThreadOffset<4>(args, imm12);
+              GetDisassemblerOptions()->thread_offset_name_function_(args, imm12);
             } else if (Rn.r == PC) {
               T2LitType lit_type[] = {
                   kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid,
@@ -1497,82 +1480,101 @@
           }
           break;
         }
-      default:      // more formats
-        if ((op2 >> 4) == 2) {      // 010xxxx
-          // data processing (register)
-          if ((instr & 0x0080f0f0) == 0x0000f000) {
-            // LSL, LSR, ASR, ROR
-            uint32_t shift_op = (instr >> 21) & 3;
-            uint32_t S = (instr >> 20) & 1;
-            ArmRegister Rd(instr, 8);
+        case 0x7B: case 0x7F: {
+          FpRegister d(instr, 12, 22);
+          FpRegister m(instr, 0, 5);
+          uint32_t sz = (instr >> 18) & 0x3;  // Decode size bits.
+          uint32_t size = (sz == 0) ? 8 : sz << 4;
+          uint32_t opc2 = (instr >> 7) & 0xF;
+          uint32_t Q = (instr >> 6) & 1;
+          if (Q == 0 && opc2 == 0xA && size == 8) {  // 1010, VCNT
+            opcode << "vcnt." << size;
+            args << d << ", " << m;
+          } else if (Q == 0 && (opc2 == 0x4 || opc2 == 0x5) && size <= 32) {  // 010x, VPADDL
+            bool op = HasBitSet(instr, 7);
+            opcode << "vpaddl." << (op ? "u" : "s") << size;
+            args << d << ", " << m;
+          } else {
+            opcode << "UNKNOWN " << op2;
+          }
+          break;
+        }
+        default:      // more formats
+          if ((op2 >> 4) == 2) {      // 010xxxx
+            // data processing (register)
+            if ((instr & 0x0080f0f0) == 0x0000f000) {
+              // LSL, LSR, ASR, ROR
+              uint32_t shift_op = (instr >> 21) & 3;
+              uint32_t S = (instr >> 20) & 1;
+              ArmRegister Rd(instr, 8);
+              ArmRegister Rn(instr, 16);
+              ArmRegister Rm(instr, 0);
+              opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : "");
+              args << Rd << ", " << Rn << ", " << Rm;
+            }
+          } else if ((op2 >> 3) == 6) {       // 0110xxx
+            // Multiply, multiply accumulate, and absolute difference
+            op1 = (instr >> 20) & 0x7;
+            op2 = (instr >> 4) & 0x1;
+            ArmRegister Ra(instr, 12);
             ArmRegister Rn(instr, 16);
             ArmRegister Rm(instr, 0);
-            opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : "");
-            args << Rd << ", " << Rn << ", " << Rm;
-          }
-        } else if ((op2 >> 3) == 6) {       // 0110xxx
-          // Multiply, multiply accumulate, and absolute difference
-          op1 = (instr >> 20) & 0x7;
-          op2 = (instr >> 4) & 0x1;
-          ArmRegister Ra(instr, 12);
-          ArmRegister Rn(instr, 16);
-          ArmRegister Rm(instr, 0);
-          ArmRegister Rd(instr, 8);
-          switch (op1) {
-          case 0:
-            if (op2 == 0) {
-              if (Ra.r == 0xf) {
-                opcode << "mul";
-                args << Rd << ", " << Rn << ", " << Rm;
+            ArmRegister Rd(instr, 8);
+            switch (op1) {
+            case 0:
+              if (op2 == 0) {
+                if (Ra.r == 0xf) {
+                  opcode << "mul";
+                  args << Rd << ", " << Rn << ", " << Rm;
+                } else {
+                  opcode << "mla";
+                  args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+                }
               } else {
-                opcode << "mla";
+                opcode << "mls";
                 args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
               }
-            } else {
-              opcode << "mls";
-              args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+              break;
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+                break;        // do these sometime
             }
-            break;
-          case 1:
-          case 2:
-          case 3:
-          case 4:
-          case 5:
-          case 6:
-              break;        // do these sometime
+          } else if ((op2 >> 3) == 7) {       // 0111xxx
+            // Long multiply, long multiply accumulate, and divide
+            op1 = (instr >> 20) & 0x7;
+            op2 = (instr >> 4) & 0xf;
+            ArmRegister Rn(instr, 16);
+            ArmRegister Rm(instr, 0);
+            ArmRegister Rd(instr, 8);
+            ArmRegister RdHi(instr, 8);
+            ArmRegister RdLo(instr, 12);
+            switch (op1) {
+            case 0:
+              opcode << "smull";
+              args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+              break;
+            case 1:
+              opcode << "sdiv";
+              args << Rd << ", " << Rn << ", " << Rm;
+              break;
+            case 2:
+              opcode << "umull";
+              args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+              break;
+            case 3:
+              opcode << "udiv";
+              args << Rd << ", " << Rn << ", " << Rm;
+              break;
+            case 4:
+            case 5:
+            case 6:
+              break;      // TODO: when we generate these...
+            }
           }
-        } else if ((op2 >> 3) == 7) {       // 0111xxx
-          // Long multiply, long multiply accumulate, and divide
-          op1 = (instr >> 20) & 0x7;
-          op2 = (instr >> 4) & 0xf;
-          ArmRegister Rn(instr, 16);
-          ArmRegister Rm(instr, 0);
-          ArmRegister Rd(instr, 8);
-          ArmRegister RdHi(instr, 8);
-          ArmRegister RdLo(instr, 12);
-          switch (op1) {
-          case 0:
-            opcode << "smull";
-            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
-            break;
-          case 1:
-            opcode << "sdiv";
-            args << Rd << ", " << Rn << ", " << Rm;
-            break;
-          case 2:
-            opcode << "umull";
-            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
-            break;
-          case 3:
-            opcode << "udiv";
-            args << Rd << ", " << Rn << ", " << Rm;
-            break;
-          case 4:
-          case 5:
-          case 6:
-            break;      // TODO: when we generate these...
-          }
-        }
       }
       break;
     default:
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index 6a9afe5..80bacb2 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -22,7 +22,8 @@
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
-#include "thread.h"
+
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
 namespace art {
 namespace arm64 {
@@ -38,15 +39,14 @@
   LR  = 30
 };
 
-void CustomDisassembler::AppendRegisterNameToOutput(
-    const vixl::Instruction* instr,
-    const vixl::CPURegister& reg) {
+void CustomDisassembler::AppendRegisterNameToOutput(const Instruction* instr,
+                                                    const CPURegister& reg) {
   USE(instr);
   if (reg.IsRegister() && reg.Is64Bits()) {
-    if (reg.code() == TR) {
+    if (reg.GetCode() == TR) {
       AppendToOutput("tr");
       return;
-    } else if (reg.code() == LR) {
+    } else if (reg.GetCode() == LR) {
       AppendToOutput("lr");
       return;
     }
@@ -56,7 +56,7 @@
   Disassembler::AppendRegisterNameToOutput(instr, reg);
 }
 
-void CustomDisassembler::VisitLoadLiteral(const vixl::Instruction* instr) {
+void CustomDisassembler::VisitLoadLiteral(const Instruction* instr) {
   Disassembler::VisitLoadLiteral(instr);
 
   if (!read_literals_) {
@@ -66,27 +66,27 @@
   // Get address of literal. Bail if not within expected buffer range to
   // avoid trying to fetch invalid literals (we can encounter this when
   // interpreting raw data as instructions).
-  void* data_address = instr->LiteralAddress<void*>();
+  void* data_address = instr->GetLiteralAddress<void*>();
   if (data_address < base_address_ || data_address >= end_address_) {
     AppendToOutput(" (?)");
     return;
   }
 
   // Output information on literal.
-  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
+  Instr op = instr->Mask(LoadLiteralMask);
   switch (op) {
-    case vixl::LDR_w_lit:
-    case vixl::LDR_x_lit:
-    case vixl::LDRSW_x_lit: {
-      int64_t data = op == vixl::LDR_x_lit ? *reinterpret_cast<int64_t*>(data_address)
-                                           : *reinterpret_cast<int32_t*>(data_address);
+    case LDR_w_lit:
+    case LDR_x_lit:
+    case LDRSW_x_lit: {
+      int64_t data = op == LDR_x_lit ? *reinterpret_cast<int64_t*>(data_address)
+                                     : *reinterpret_cast<int32_t*>(data_address);
       AppendToOutput(" (0x%" PRIx64 " / %" PRId64 ")", data, data);
       break;
     }
-    case vixl::LDR_s_lit:
-    case vixl::LDR_d_lit: {
-      double data = (op == vixl::LDR_s_lit) ? *reinterpret_cast<float*>(data_address)
-                                            : *reinterpret_cast<double*>(data_address);
+    case LDR_s_lit:
+    case LDR_d_lit: {
+      double data = (op == LDR_s_lit) ? *reinterpret_cast<float*>(data_address)
+                                      : *reinterpret_cast<double*>(data_address);
       AppendToOutput(" (%g)", data);
       break;
     }
@@ -95,27 +95,27 @@
   }
 }
 
-void CustomDisassembler::VisitLoadStoreUnsignedOffset(const vixl::Instruction* instr) {
+void CustomDisassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   Disassembler::VisitLoadStoreUnsignedOffset(instr);
 
-  if (instr->Rn() == TR) {
-    int64_t offset = instr->ImmLSUnsigned() << instr->SizeLS();
+  if (instr->GetRn() == TR) {
+    int64_t offset = instr->GetImmLSUnsigned() << instr->GetSizeLS();
     std::ostringstream tmp_stream;
-    Thread::DumpThreadOffset<8>(tmp_stream, static_cast<uint32_t>(offset));
+    options_->thread_offset_name_function_(tmp_stream, static_cast<uint32_t>(offset));
     AppendToOutput(" ; %s", tmp_stream.str().c_str());
   }
 }
 
 size_t DisassemblerArm64::Dump(std::ostream& os, const uint8_t* begin) {
-  const vixl::Instruction* instr = reinterpret_cast<const vixl::Instruction*>(begin);
+  const Instruction* instr = reinterpret_cast<const Instruction*>(begin);
   decoder.Decode(instr);
     os << FormatInstructionPointer(begin)
-     << StringPrintf(": %08x\t%s\n", instr->InstructionBits(), disasm.GetOutput());
-  return vixl::kInstructionSize;
+     << StringPrintf(": %08x\t%s\n", instr->GetInstructionBits(), disasm.GetOutput());
+  return kInstructionSize;
 }
 
 void DisassemblerArm64::Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) {
-  for (const uint8_t* cur = begin; cur < end; cur += vixl::kInstructionSize) {
+  for (const uint8_t* cur = begin; cur < end; cur += kInstructionSize) {
     Dump(os, cur);
   }
 }
diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h
index a4e5ee8..19e4dfb 100644
--- a/disassembler/disassembler_arm64.h
+++ b/disassembler/disassembler_arm64.h
@@ -19,36 +19,39 @@
 
 #include "disassembler.h"
 
+// TODO(VIXL): Make VIXL compile with -Wshadow.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
-#include "vixl/a64/decoder-a64.h"
-#include "vixl/a64/disasm-a64.h"
+#include "aarch64/decoder-aarch64.h"
+#include "aarch64/disasm-aarch64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
 
-class CustomDisassembler FINAL : public vixl::Disassembler {
+class CustomDisassembler FINAL : public vixl::aarch64::Disassembler {
  public:
   explicit CustomDisassembler(DisassemblerOptions* options)
-      : vixl::Disassembler(),
+      : vixl::aarch64::Disassembler(),
         read_literals_(options->can_read_literals_),
         base_address_(options->base_address_),
-        end_address_(options->end_address_) {
+        end_address_(options->end_address_),
+        options_(options) {
     if (!options->absolute_addresses_) {
-      MapCodeAddress(0, reinterpret_cast<const vixl::Instruction*>(options->base_address_));
+      MapCodeAddress(0,
+                     reinterpret_cast<const vixl::aarch64::Instruction*>(options->base_address_));
     }
   }
 
   // Use register aliases in the disassembly.
-  void AppendRegisterNameToOutput(const vixl::Instruction* instr,
-                                  const vixl::CPURegister& reg) OVERRIDE;
+  void AppendRegisterNameToOutput(const vixl::aarch64::Instruction* instr,
+                                  const vixl::aarch64::CPURegister& reg) OVERRIDE;
 
   // Improve the disassembly of literal load instructions.
-  void VisitLoadLiteral(const vixl::Instruction* instr) OVERRIDE;
+  void VisitLoadLiteral(const vixl::aarch64::Instruction* instr) OVERRIDE;
 
   // Improve the disassembly of thread offset.
-  void VisitLoadStoreUnsignedOffset(const vixl::Instruction* instr) OVERRIDE;
+  void VisitLoadStoreUnsignedOffset(const vixl::aarch64::Instruction* instr) OVERRIDE;
 
  private:
   // Indicate if the disassembler should read data loaded from literal pools.
@@ -62,6 +65,8 @@
   // Valid address range: [base_address_, end_address_)
   const void* const base_address_;
   const void* const end_address_;
+
+  DisassemblerOptions* options_;
 };
 
 class DisassemblerArm64 FINAL : public Disassembler {
@@ -75,7 +80,7 @@
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
-  vixl::Decoder decoder;
+  vixl::aarch64::Decoder decoder;
   CustomDisassembler disasm;
 
   DISALLOW_COPY_AND_ASSIGN(DisassemblerArm64);
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index a95ea64..02c6d71 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -21,7 +21,6 @@
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
-#include "thread.h"
 
 namespace art {
 namespace mips {
@@ -330,8 +329,10 @@
   { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
   { kITypeMask, 56u << kOpcodeShift, "sc", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
+  { kJTypeMask, 58u << kOpcodeShift, "balc", "P" },
   { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" },
   { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" },
+  { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (1 << 19), "lwpc", "So" },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
   { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" },
   { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" },  // TODO: de-dup?
@@ -384,6 +385,8 @@
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 13, "trunc.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 14, "ceil.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 15, "floor.w", "fad" },
+  { kFpMask | (0x201 << 16), kCop1 | (0x200 << 16) | 17, "movf", "fadc" },
+  { kFpMask | (0x201 << 16), kCop1 | (0x201 << 16) | 17, "movt", "fadc" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 26, "rint", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 27, "class", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 32, "cvt.s", "fad" },
@@ -499,15 +502,19 @@
               args << StringPrintf("%+d(r%d)", offset, rs);
               if (rs == 17) {
                 args << "  ; ";
-                if (is64bit_) {
-                  Thread::DumpThreadOffset<8>(args, offset);
-                } else {
-                  Thread::DumpThreadOffset<4>(args, offset);
-                }
+                GetDisassemblerOptions()->thread_offset_name_function_(args, offset);
               }
             }
             break;
-          case 'P':  // 26-bit offset in bc.
+          case 'o':  // 19-bit offset in lwpc.
+            {
+              int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1);
+              offset <<= 2;
+              args << FormatInstructionPointer(instr_ptr + offset);
+              args << StringPrintf("  ; %+d", offset);
+            }
+            break;
+          case 'P':  // 26-bit offset in bc and balc.
             {
               int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1);
               offset <<= 2;
@@ -538,6 +545,7 @@
     }
   }
 
+  // TODO: Simplify this once these sequences are simplified in the compiler.
   // Special cases for sequences of:
   //   pc-relative +/- 2GB branch:
   //     auipc  reg, imm
diff --git a/disassembler/disassembler_mips.h b/disassembler/disassembler_mips.h
index b0e49b3..6342f22 100644
--- a/disassembler/disassembler_mips.h
+++ b/disassembler/disassembler_mips.h
@@ -26,9 +26,8 @@
 
 class DisassemblerMips FINAL : public Disassembler {
  public:
-  DisassemblerMips(DisassemblerOptions* options, bool is64bit)
+  explicit DisassemblerMips(DisassemblerOptions* options)
       : Disassembler(options),
-        is64bit_(is64bit),
         last_ptr_(nullptr),
         last_instr_(0) {}
 
@@ -36,8 +35,6 @@
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
-  const bool is64bit_;
-
   // Address and encoding of the last disassembled instruction.
   // Needed to produce more readable disassembly of certain 2-instruction sequences.
   const uint8_t* last_ptr_;
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 1f74c93..2ca84e5 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -23,7 +23,6 @@
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
-#include "thread.h"
 
 namespace art {
 namespace x86 {
@@ -243,7 +242,38 @@
   return address.str();
 }
 
+size_t DisassemblerX86::DumpNops(std::ostream& os, const uint8_t* instr) {
+static constexpr uint8_t kNops[][10] = {
+      { },
+      { 0x90 },
+      { 0x66, 0x90 },
+      { 0x0f, 0x1f, 0x00 },
+      { 0x0f, 0x1f, 0x40, 0x00 },
+      { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+      { 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+      { 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 },
+      { 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
+  };
+
+  for (size_t i = 1; i < arraysize(kNops); ++i) {
+    if (memcmp(instr, kNops[i], i) == 0) {
+      os << FormatInstructionPointer(instr)
+         << StringPrintf(": %22s    \t       nop \n", DumpCodeHex(instr, instr + i).c_str());
+      return i;
+    }
+  }
+
+  return 0;
+}
+
 size_t DisassemblerX86::DumpInstruction(std::ostream& os, const uint8_t* instr) {
+  size_t nop_size = DumpNops(os, instr);
+  if (nop_size != 0u) {
+    return nop_size;
+  }
+
   const uint8_t* begin_instr = instr;
   bool have_prefixes = true;
   uint8_t prefix[4] = {0, 0, 0, 0};
@@ -400,6 +430,7 @@
   case 0x89: opcode1 = "mov"; store = true; has_modrm = true; break;
   case 0x8A: opcode1 = "mov"; load = true; has_modrm = true; byte_operand = true; break;
   case 0x8B: opcode1 = "mov"; load = true; has_modrm = true; break;
+  case 0x9D: opcode1 = "popf"; break;
 
   case 0x0F:  // 2 byte extended opcode
     instr++;
@@ -565,7 +596,7 @@
               opcode1 = "roundss";
               prefix[2] = 0;
               has_modrm = true;
-              store = true;
+              load = true;
               src_reg_file = SSE;
               dst_reg_file = SSE;
               immediate_bytes = 1;
@@ -574,7 +605,7 @@
               opcode1 = "roundsd";
               prefix[2] = 0;
               has_modrm = true;
-              store = true;
+              load = true;
               src_reg_file = SSE;
               dst_reg_file = SSE;
               immediate_bytes = 1;
@@ -1377,11 +1408,11 @@
   }
   if (prefix[1] == kFs && !supports_rex_) {
     args << "  ; ";
-    Thread::DumpThreadOffset<4>(args, address_bits);
+    GetDisassemblerOptions()->thread_offset_name_function_(args, address_bits);
   }
   if (prefix[1] == kGs && supports_rex_) {
     args << "  ; ";
-    Thread::DumpThreadOffset<8>(args, address_bits);
+    GetDisassemblerOptions()->thread_offset_name_function_(args, address_bits);
   }
   const char* prefix_str;
   switch (prefix[0]) {
diff --git a/disassembler/disassembler_x86.h b/disassembler/disassembler_x86.h
index 71c3e41..31b62bc 100644
--- a/disassembler/disassembler_x86.h
+++ b/disassembler/disassembler_x86.h
@@ -33,6 +33,7 @@
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
+  size_t DumpNops(std::ostream& os, const uint8_t* instr);
   size_t DumpInstruction(std::ostream& os, const uint8_t* instr);
 
   std::string DumpAddress(uint8_t mod, uint8_t rm, uint8_t rex64, uint8_t rex_w, bool no_ops,
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 214222d..21a0ca0 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -285,7 +285,7 @@
                         const backtrace_map_t& boot_map)
     SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
-    const size_t pointer_size = InstructionSetPointerSize(
+    const PointerSize pointer_size = InstructionSetPointerSize(
         Runtime::Current()->GetInstructionSet());
 
     std::string file_name =
@@ -729,7 +729,7 @@
           os << "        " << reinterpret_cast<void*>(obj) << " ";
           os << "  entryPointFromJni: "
              << reinterpret_cast<const void*>(
-                    art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
+                    art_method->GetDataPtrSize(pointer_size)) << ", ";
           os << "  entryPointFromQuickCompiledCode: "
              << reinterpret_cast<const void*>(
                     art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
@@ -810,7 +810,7 @@
           os << "        " << reinterpret_cast<void*>(obj) << " ";
           os << "  entryPointFromJni: "
              << reinterpret_cast<const void*>(
-                    art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
+                    art_method->GetDataPtrSize(pointer_size)) << ", ";
           os << "  entryPointFromQuickCompiledCode: "
              << reinterpret_cast<const void*>(
                     art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index aa4635d..96c8e94 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -35,7 +35,7 @@
 #include "debug/elf_debug_writer.h"
 #include "debug/method_debug_info.h"
 #include "dex_file-inl.h"
-#include "dex_instruction.h"
+#include "dex_instruction-inl.h"
 #include "disassembler.h"
 #include "elf_builder.h"
 #include "gc/space/image_space.h"
@@ -56,8 +56,9 @@
 #include "os.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
-#include "stack_map.h"
 #include "ScopedLocalRef.h"
+#include "stack_map.h"
+#include "string_reference.h"
 #include "thread_list.h"
 #include "type_lookup_table.h"
 #include "verifier/method_verifier.h"
@@ -72,9 +73,10 @@
   "kResolutionMethod",
   "kImtConflictMethod",
   "kImtUnimplementedMethod",
-  "kCalleeSaveMethod",
-  "kRefsOnlySaveMethod",
-  "kRefsAndArgsSaveMethod",
+  "kSaveAllCalleeSavesMethod",
+  "kSaveRefsOnlyMethod",
+  "kSaveRefsAndArgsMethod",
+  "kSaveEverythingMethod",
 };
 
 const char* image_roots_descriptions_[] = {
@@ -333,10 +335,14 @@
       resolved_addr2instr_(0),
       instruction_set_(oat_file_.GetOatHeader().GetInstructionSet()),
       disassembler_(Disassembler::Create(instruction_set_,
-                                         new DisassemblerOptions(options_.absolute_addresses_,
-                                                                 oat_file.Begin(),
-                                                                 oat_file.End(),
-                                                                 true /* can_read_literals_ */))) {
+                                         new DisassemblerOptions(
+                                             options_.absolute_addresses_,
+                                             oat_file.Begin(),
+                                             oat_file.End(),
+                                             true /* can_read_literals_ */,
+                                             Is64BitInstructionSet(instruction_set_)
+                                                 ? &Thread::DumpThreadOffset<PointerSize::k64>
+                                                 : &Thread::DumpThreadOffset<PointerSize::k32>))) {
     CHECK(options_.class_loader_ != nullptr);
     CHECK(options_.class_filter_ != nullptr);
     CHECK(options_.method_filter_ != nullptr);
@@ -447,6 +453,28 @@
       os << StringPrintf("0x%08x\n\n", resolved_addr2instr_);
     }
 
+    // Dumping the dex file overview is compact enough to do even if header only.
+    DexFileData cumulative;
+    for (size_t i = 0; i < oat_dex_files_.size(); i++) {
+      const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
+      CHECK(oat_dex_file != nullptr);
+      std::string error_msg;
+      const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
+      if (dex_file == nullptr) {
+        os << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation() << "': "
+           << error_msg;
+        continue;
+      }
+      DexFileData data(*dex_file);
+      os << "Dex file data for " << dex_file->GetLocation() << "\n";
+      data.Dump(os);
+      os << "\n";
+      cumulative.Add(data);
+    }
+    os << "Cumulative dex file data\n";
+    cumulative.Dump(os);
+    os << "\n";
+
     if (!options_.dump_header_only_) {
       for (size_t i = 0; i < oat_dex_files_.size(); i++) {
         const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
@@ -568,6 +596,122 @@
     offsets_.insert(oat_method.GetVmapTableOffset());
   }
 
+  // Dex file data, may be for multiple different dex files.
+  class DexFileData {
+   public:
+    DexFileData() {}
+
+    explicit DexFileData(const DexFile& dex_file)
+        : num_string_ids_(dex_file.NumStringIds()),
+          num_method_ids_(dex_file.NumMethodIds()),
+          num_field_ids_(dex_file.NumFieldIds()),
+          num_type_ids_(dex_file.NumTypeIds()),
+          num_class_defs_(dex_file.NumClassDefs()) {
+      for (size_t class_def_index = 0; class_def_index < num_class_defs_; ++class_def_index) {
+        const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
+        WalkClass(dex_file, class_def);
+      }
+    }
+
+    void Add(const DexFileData& other) {
+      AddAll(unique_string_ids_from_code_, other.unique_string_ids_from_code_);
+      num_string_ids_from_code_ += other.num_string_ids_from_code_;
+      AddAll(dex_code_item_ptrs_, other.dex_code_item_ptrs_);
+      dex_code_bytes_ += other.dex_code_bytes_;
+      num_string_ids_ += other.num_string_ids_;
+      num_method_ids_ += other.num_method_ids_;
+      num_field_ids_ += other.num_field_ids_;
+      num_type_ids_ += other.num_type_ids_;
+      num_class_defs_ += other.num_class_defs_;
+    }
+
+    void Dump(std::ostream& os) {
+      os << "Num string ids: " << num_string_ids_ << "\n";
+      os << "Num method ids: " << num_method_ids_ << "\n";
+      os << "Num field ids: " << num_field_ids_ << "\n";
+      os << "Num type ids: " << num_type_ids_ << "\n";
+      os << "Num class defs: " << num_class_defs_ << "\n";
+      os << "Unique strings loaded from dex code: " << unique_string_ids_from_code_.size() << "\n";
+      os << "Total strings loaded from dex code: " << num_string_ids_from_code_ << "\n";
+      os << "Number of unique dex code items: " << dex_code_item_ptrs_.size() << "\n";
+      os << "Total number of dex code bytes: " << dex_code_bytes_ << "\n";
+    }
+
+  private:
+    void WalkClass(const DexFile& dex_file, const DexFile::ClassDef& class_def) {
+      const uint8_t* class_data = dex_file.GetClassData(class_def);
+      if (class_data == nullptr) {  // empty class such as a marker interface?
+        return;
+      }
+      ClassDataItemIterator it(dex_file, class_data);
+      SkipAllFields(it);
+      while (it.HasNextDirectMethod()) {
+        WalkCodeItem(dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      while (it.HasNextVirtualMethod()) {
+        WalkCodeItem(dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+
+    void WalkCodeItem(const DexFile& dex_file, const DexFile::CodeItem* code_item) {
+      if (code_item == nullptr) {
+        return;
+      }
+      const size_t code_item_size = code_item->insns_size_in_code_units_;
+      const uint16_t* code_ptr = code_item->insns_;
+      const uint16_t* code_end = code_item->insns_ + code_item_size;
+
+      // If we inserted a new dex code item pointer, add to total code bytes.
+      if (dex_code_item_ptrs_.insert(code_ptr).second) {
+        dex_code_bytes_ += code_item_size * sizeof(code_ptr[0]);
+      }
+
+      while (code_ptr < code_end) {
+        const Instruction* inst = Instruction::At(code_ptr);
+        switch (inst->Opcode()) {
+          case Instruction::CONST_STRING: {
+            const uint32_t string_index = inst->VRegB_21c();
+            unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
+            ++num_string_ids_from_code_;
+            break;
+          }
+          case Instruction::CONST_STRING_JUMBO: {
+            const uint32_t string_index = inst->VRegB_31c();
+            unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
+            ++num_string_ids_from_code_;
+            break;
+          }
+          default:
+            break;
+        }
+
+        code_ptr += inst->SizeInCodeUnits();
+      }
+    }
+
+    // Unique string ids loaded from dex code.
+    std::set<StringReference, StringReferenceComparator> unique_string_ids_from_code_;
+
+    // Total string ids loaded from dex code.
+    size_t num_string_ids_from_code_ = 0;
+
+    // Unique code pointers.
+    std::set<const void*> dex_code_item_ptrs_;
+
+    // Total "unique" dex code bytes.
+    size_t dex_code_bytes_ = 0;
+
+    // Other dex ids.
+    size_t num_string_ids_ = 0;
+    size_t num_method_ids_ = 0;
+    size_t num_field_ids_ = 0;
+    size_t num_type_ids_ = 0;
+    size_t num_class_defs_ = 0;
+  };
+
   bool DumpOatDexFile(std::ostream& os, const OatFile::OatDexFile& oat_dex_file) {
     bool success = true;
     bool stop_analysis = false;
@@ -622,7 +766,9 @@
          << " (" << oat_class.GetStatus() << ")"
          << " (" << oat_class.GetType() << ")\n";
       // TODO: include bitmap here if type is kOatClassSomeCompiled?
-      if (options_.list_classes_) continue;
+      if (options_.list_classes_) {
+        continue;
+      }
       if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis)) {
         success = false;
       }
@@ -631,7 +777,7 @@
         return success;
       }
     }
-
+    os << "\n";
     os << std::flush;
     return success;
   }
@@ -1260,7 +1406,7 @@
   const std::vector<const OatFile::OatDexFile*> oat_dex_files_;
   const OatDumperOptions& options_;
   uint32_t resolved_addr2instr_;
-  InstructionSet instruction_set_;
+  const InstructionSet instruction_set_;
   std::set<uintptr_t> offsets_;
   Disassembler* disassembler_;
 };
@@ -1682,7 +1828,7 @@
     }
     ScopedIndentation indent1(&state->vios_);
     DumpFields(os, obj, obj_class);
-    const size_t image_pointer_size = state->image_header_.GetPointerSize();
+    const PointerSize image_pointer_size = state->image_header_.GetPointerSize();
     if (obj->IsObjectArray()) {
       auto* obj_array = obj->AsObjectArray<mirror::Object>();
       for (int32_t i = 0, length = obj_array->GetLength(); i < length; i++) {
@@ -1723,7 +1869,7 @@
         const auto& method_section = state->image_header_.GetMethodsSection();
         size_t num_methods = dex_cache->NumResolvedMethods();
         if (num_methods != 0u) {
-          os << "Methods (size=" << num_methods << "):";
+          os << "Methods (size=" << num_methods << "):\n";
           ScopedIndentation indent2(&state->vios_);
           auto* resolved_methods = dex_cache->GetResolvedMethods();
           for (size_t i = 0, length = dex_cache->NumResolvedMethods(); i < length; ++i) {
@@ -1732,10 +1878,12 @@
                                                              image_pointer_size);
             size_t run = 0;
             for (size_t j = i + 1;
-                j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods,
-                                                                           j,
-                                                                           image_pointer_size);
-                ++j, ++run) {}
+                 j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods,
+                                                                            j,
+                                                                            image_pointer_size);
+                 ++j) {
+              ++run;
+            }
             if (run == 0) {
               os << StringPrintf("%zd: ", i);
             } else {
@@ -1756,17 +1904,20 @@
         }
         size_t num_fields = dex_cache->NumResolvedFields();
         if (num_fields != 0u) {
-          os << "Fields (size=" << num_fields << "):";
+          os << "Fields (size=" << num_fields << "):\n";
           ScopedIndentation indent2(&state->vios_);
           auto* resolved_fields = dex_cache->GetResolvedFields();
           for (size_t i = 0, length = dex_cache->NumResolvedFields(); i < length; ++i) {
-            auto* elem = mirror::DexCache::GetElementPtrSize(resolved_fields, i, image_pointer_size);
+            auto* elem = mirror::DexCache::GetElementPtrSize(
+                resolved_fields, i, image_pointer_size);
             size_t run = 0;
             for (size_t j = i + 1;
-                j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields,
-                                                                           j,
-                                                                           image_pointer_size);
-                ++j, ++run) {}
+                 j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields,
+                                                                            j,
+                                                                            image_pointer_size);
+                 ++j) {
+              ++run;
+            }
             if (run == 0) {
               os << StringPrintf("%zd: ", i);
             } else {
@@ -1785,6 +1936,32 @@
             os << StringPrintf("%p   %s\n", elem, msg.c_str());
           }
         }
+        size_t num_types = dex_cache->NumResolvedTypes();
+        if (num_types != 0u) {
+          os << "Types (size=" << num_types << "):\n";
+          ScopedIndentation indent2(&state->vios_);
+          auto* resolved_types = dex_cache->GetResolvedTypes();
+          for (size_t i = 0; i < num_types; ++i) {
+            auto* elem = resolved_types[i].Read();
+            size_t run = 0;
+            for (size_t j = i + 1; j != num_types && elem == resolved_types[j].Read(); ++j) {
+              ++run;
+            }
+            if (run == 0) {
+              os << StringPrintf("%zd: ", i);
+            } else {
+              os << StringPrintf("%zd to %zd: ", i, i + run);
+              i = i + run;
+            }
+            std::string msg;
+            if (elem == nullptr) {
+              msg = "null";
+            } else {
+              msg = PrettyClass(elem);
+            }
+            os << StringPrintf("%p   %s\n", elem, msg.c_str());
+          }
+        }
       }
     }
     std::string temp;
@@ -1796,7 +1973,7 @@
     DCHECK(method != nullptr);
     const void* quick_oat_code_begin = GetQuickOatCodeBegin(method);
     const void* quick_oat_code_end = GetQuickOatCodeEnd(method);
-    const size_t pointer_size = image_header_.GetPointerSize();
+    const PointerSize pointer_size = image_header_.GetPointerSize();
     OatQuickMethodHeader* method_header = reinterpret_cast<OatQuickMethodHeader*>(
         reinterpret_cast<uintptr_t>(quick_oat_code_begin) - sizeof(OatQuickMethodHeader));
     if (method->IsNative()) {
@@ -1978,7 +2155,7 @@
       size_t sum_of_expansion = 0;
       size_t sum_of_expansion_squared = 0;
       size_t n = method_outlier_size.size();
-      if (n == 0) {
+      if (n <= 1) {
         return;
       }
       for (size_t i = 0; i < n; i++) {
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index c7ced8a..db97055 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
 
 #include "common_runtime_test.h"
 
 #include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
 #include "runtime/arch/instruction_set.h"
 #include "runtime/gc/heap.h"
 #include "runtime/gc/space/image_space.h"
@@ -58,26 +59,141 @@
   };
 
   // Run the test with custom arguments.
-  bool Exec(Mode mode, const std::vector<std::string>& args, std::string* error_msg) {
+  bool Exec(Mode mode,
+            const std::vector<std::string>& args,
+            bool list_only,
+            std::string* error_msg) {
     std::string file_path = GetOatDumpFilePath();
 
     EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
 
+    // ScratchFile scratch;
     std::vector<std::string> exec_argv = { file_path };
+    std::vector<std::string> expected_prefixes;
     if (mode == kModeSymbolize) {
       exec_argv.push_back("--symbolize=" + core_oat_location_);
       exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
-    } else if (mode == kModeArt) {
-      exec_argv.push_back("--image=" + core_art_location_);
-      exec_argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
-      exec_argv.push_back("--output=/dev/null");
     } else {
-      CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
-      exec_argv.push_back("--oat-file=" + core_oat_location_);
-      exec_argv.push_back("--output=/dev/null");
+      expected_prefixes.push_back("Dex file data for");
+      expected_prefixes.push_back("Num string ids:");
+      expected_prefixes.push_back("Num field ids:");
+      expected_prefixes.push_back("Num method ids:");
+      expected_prefixes.push_back("LOCATION:");
+      expected_prefixes.push_back("MAGIC:");
+      expected_prefixes.push_back("DEX FILE COUNT:");
+      if (!list_only) {
+        // Code and dex code do not show up if list only.
+        expected_prefixes.push_back("DEX CODE:");
+        expected_prefixes.push_back("CODE:");
+      }
+      if (mode == kModeArt) {
+        exec_argv.push_back("--image=" + core_art_location_);
+        exec_argv.push_back("--instruction-set=" + std::string(
+            GetInstructionSetString(kRuntimeISA)));
+        expected_prefixes.push_back("IMAGE LOCATION:");
+        expected_prefixes.push_back("IMAGE BEGIN:");
+        expected_prefixes.push_back("kDexCaches:");
+      } else {
+        CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
+        exec_argv.push_back("--oat-file=" + core_oat_location_);
+      }
     }
     exec_argv.insert(exec_argv.end(), args.begin(), args.end());
-    return ::art::Exec(exec_argv, error_msg);
+
+    bool result = true;
+    // We must set --android-root.
+    int link[2];
+    if (pipe(link) == -1) {
+      *error_msg = strerror(errno);
+      return false;
+    }
+
+    const pid_t pid = fork();
+    if (pid == -1) {
+      *error_msg = strerror(errno);
+      return false;
+    }
+
+    if (pid == 0) {
+      dup2(link[1], STDOUT_FILENO);
+      close(link[0]);
+      close(link[1]);
+      // change process groups, so we don't get reaped by ProcessManager
+      setpgid(0, 0);
+      // Use execv here rather than art::Exec to avoid blocking on waitpid here.
+      std::vector<char*> argv;
+      for (size_t i = 0; i < exec_argv.size(); ++i) {
+        argv.push_back(const_cast<char*>(exec_argv[i].c_str()));
+      }
+      argv.push_back(nullptr);
+      UNUSED(execv(argv[0], &argv[0]));
+      const std::string command_line(Join(exec_argv, ' '));
+      PLOG(ERROR) << "Failed to execv(" << command_line << ")";
+      // _exit to avoid atexit handlers in child.
+      _exit(1);
+    } else {
+      close(link[1]);
+      static const size_t kLineMax = 256;
+      char line[kLineMax] = {};
+      size_t line_len = 0;
+      size_t total = 0;
+      std::vector<bool> found(expected_prefixes.size(), false);
+      while (true) {
+        while (true) {
+          size_t spaces = 0;
+          // Trim spaces at the start of the line.
+          for (; spaces < line_len && isspace(line[spaces]); ++spaces) {}
+          if (spaces > 0) {
+            line_len -= spaces;
+            memmove(&line[0], &line[spaces], line_len);
+          }
+          ssize_t bytes_read =
+              TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len));
+          if (bytes_read <= 0) {
+            break;
+          }
+          line_len += bytes_read;
+          total += bytes_read;
+        }
+        if (line_len == 0) {
+          break;
+        }
+        // Check contents.
+        for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+          const std::string& expected = expected_prefixes[i];
+          if (!found[i] &&
+              line_len >= expected.length() &&
+              memcmp(line, expected.c_str(), expected.length()) == 0) {
+            found[i] = true;
+          }
+        }
+        // Skip to next line.
+        size_t next_line = 0;
+        for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {}
+        line_len -= next_line + 1;
+        memmove(&line[0], &line[next_line + 1], line_len);
+      }
+      if (mode == kModeSymbolize) {
+        EXPECT_EQ(total, 0u);
+      } else {
+        EXPECT_GT(total, 0u);
+      }
+      LOG(INFO) << "Processed bytes " << total;
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        result = (status == 0);
+      }
+
+      for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+        if (!found[i]) {
+          LOG(ERROR) << "Did not find prefix " << expected_prefixes[i];
+          result = false;
+        }
+      }
+    }
+
+    return result;
   }
 
  private:
@@ -89,37 +205,37 @@
 #if !defined(__arm__) && !defined(__mips__)
 TEST_F(OatDumpTest, TestImage) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestOatImage) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeOat, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeOat, {}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestNoDumpVmap) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestNoDisassemble) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--no-disassemble"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--no-disassemble"}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestListClasses) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--list-classes"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--list-classes"}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestListMethods) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--list-methods"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--list-methods"}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestSymbolize) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeSymbolize, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeSymbolize, {}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 #endif
 }  // namespace art
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 5bb61bb..3f6531b 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -37,6 +37,7 @@
 #include "gc/space/image_space.h"
 #include "image-inl.h"
 #include "mirror/abstract_method.h"
+#include "mirror/dex_cache.h"
 #include "mirror/object-inl.h"
 #include "mirror/method.h"
 #include "mirror/reference.h"
@@ -489,13 +490,13 @@
 };
 
 void PatchOat::PatchArtMethods(const ImageHeader* image_header) {
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
   PatchOatArtMethodVisitor visitor(this);
   image_header->VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
 }
 
 void PatchOat::PatchImTables(const ImageHeader* image_header) {
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
   // We can safely walk target image since the conflict tables are independent.
   image_header->VisitPackedImTables(
       [this](ArtMethod* method) {
@@ -506,7 +507,7 @@
 }
 
 void PatchOat::PatchImtConflictTables(const ImageHeader* image_header) {
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
   // We can safely walk target image since the conflict tables are independent.
   image_header->VisitPackedImtConflictTables(
       [this](ArtMethod* method) {
@@ -584,7 +585,7 @@
 void PatchOat::PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) {
   auto* dex_caches = down_cast<mirror::ObjectArray<mirror::DexCache>*>(
       img_roots->Get(ImageHeader::kDexCaches));
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
   for (size_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
     auto* orig_dex_cache = dex_caches->GetWithoutChecks(i);
     auto* copy_dex_cache = RelocatedCopyOf(orig_dex_cache);
@@ -592,8 +593,8 @@
     // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
     // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
     //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
-    GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
-    GcRoot<mirror::String>* relocated_strings = RelocatedAddressOfPointer(orig_strings);
+    mirror::StringDexCacheType* orig_strings = orig_dex_cache->GetStrings();
+    mirror::StringDexCacheType* relocated_strings = RelocatedAddressOfPointer(orig_strings);
     copy_dex_cache->SetField64<false>(
         mirror::DexCache::StringsOffset(),
         static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_strings)));
@@ -705,7 +706,7 @@
   PatchOat::PatchVisitor visitor(this, copy);
   object->VisitReferences<kVerifyNone>(visitor, visitor);
   if (object->IsClass<kVerifyNone>()) {
-    const size_t pointer_size = InstructionSetPointerSize(isa_);
+    const PointerSize pointer_size = InstructionSetPointerSize(isa_);
     mirror::Class* klass = object->AsClass();
     mirror::Class* copy_klass = down_cast<mirror::Class*>(copy);
     RelocatedPointerVisitor native_visitor(this);
@@ -736,7 +737,7 @@
 }
 
 void PatchOat::FixupMethod(ArtMethod* object, ArtMethod* copy) {
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  const PointerSize pointer_size = InstructionSetPointerSize(isa_);
   copy->CopyFrom(object, pointer_size);
   // Just update the entry points if it looks like we should.
   // TODO: sanity check all the pointers' values
@@ -748,8 +749,8 @@
   copy->SetEntryPointFromQuickCompiledCodePtrSize(RelocatedAddressOfPointer(
       object->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)), pointer_size);
   // No special handling for IMT conflict table since all pointers are moved by the same offset.
-  copy->SetEntryPointFromJniPtrSize(RelocatedAddressOfPointer(
-      object->GetEntryPointFromJniPtrSize(pointer_size)), pointer_size);
+  copy->SetDataPtrSize(RelocatedAddressOfPointer(
+      object->GetDataPtrSize(pointer_size)), pointer_size);
 }
 
 bool PatchOat::Patch(File* input_oat, off_t delta, File* output_oat, TimingLogger* timings,
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 61ec695..64efea9d 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -18,6 +18,7 @@
 #define ART_PATCHOAT_PATCHOAT_H_
 
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "elf_file.h"
@@ -168,7 +169,7 @@
     }
     auto ret = reinterpret_cast<uintptr_t>(obj) + delta_;
     // Trim off high bits in case negative relocation with 64 bit patchoat.
-    if (InstructionSetPointerSize(isa_) == sizeof(uint32_t)) {
+    if (Is32BitISA()) {
       ret = static_cast<uintptr_t>(static_cast<uint32_t>(ret));
     }
     return reinterpret_cast<T*>(ret);
@@ -181,12 +182,16 @@
     }
     T ret = obj + delta_;
     // Trim off high bits in case negative relocation with 64 bit patchoat.
-    if (InstructionSetPointerSize(isa_) == 4) {
+    if (Is32BitISA()) {
       ret = static_cast<T>(static_cast<uint32_t>(ret));
     }
     return ret;
   }
 
+  bool Is32BitISA() const {
+    return InstructionSetPointerSize(isa_) == PointerSize::k32;
+  }
+
   // Walks through the old image and patches the mmap'd copy of it to the new offset. It does not
   // change the heap.
   class PatchVisitor {
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index 462c397..cd0aa6f 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -61,17 +61,21 @@
     ASSERT_TRUE(file_info.Equals(info));
   }
 
-    // Runs test with given arguments.
-  int ProcessProfiles(const std::vector<int>& profiles_fd, int reference_profile_fd) {
+  std::string GetProfmanCmd() {
     std::string file_path = GetTestAndroidRoot();
     file_path += "/bin/profman";
     if (kIsDebugBuild) {
       file_path += "d";
     }
-
-    EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
+    EXPECT_TRUE(OS::FileExists(file_path.c_str()))
+        << file_path << " should be a valid file path";
+    return file_path;
+  }
+  // Runs test with given arguments.
+  int ProcessProfiles(const std::vector<int>& profiles_fd, int reference_profile_fd) {
+    std::string profman_cmd = GetProfmanCmd();
     std::vector<std::string> argv_str;
-    argv_str.push_back(file_path);
+    argv_str.push_back(profman_cmd);
     for (size_t k = 0; k < profiles_fd.size(); k++) {
       argv_str.push_back("--profile-file-fd=" + std::to_string(profiles_fd[k]));
     }
@@ -80,6 +84,15 @@
     std::string error;
     return ExecAndReturnCode(argv_str, &error);
   }
+
+  bool GenerateTestProfile(const std::string& filename) {
+    std::string profman_cmd = GetProfmanCmd();
+    std::vector<std::string> argv_str;
+    argv_str.push_back(profman_cmd);
+    argv_str.push_back("--generate-test-profile=" + filename);
+    std::string error;
+    return ExecAndReturnCode(argv_str, &error);
+  }
 };
 
 TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
@@ -282,4 +295,15 @@
   CheckProfileInfo(profile1, info1);
 }
 
+TEST_F(ProfileAssistantTest, TestProfileGeneration) {
+  ScratchFile profile;
+  // Generate a test profile.
+  GenerateTestProfile(profile.GetFilename());
+
+  // Verify that the generated profile is valid and can be loaded.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(info.Load(GetFd(profile)));
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index 754e431..a5fefa7 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -100,6 +100,14 @@
   UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
   UsageError("      accepts a file descriptor. Cannot be used together with");
   UsageError("      --reference-profile-file.");
+  UsageError("  --generate-test-profile=<filename>: generates a random profile file for testing.");
+  UsageError("  --generate-test-profile-num-dex=<number>: number of dex files that should be");
+  UsageError("      included in the generated profile. Defaults to 20.");
+  UsageError("  --generate-test-profile-method-ratio=<number>: the percentage from the maximum");
+  UsageError("      number of methods that should be generated. Defaults to 5.");
+  UsageError("  --generate-test-profile-class-ratio=<number>: the percentage from the maximum");
+  UsageError("      number of classes that should be generated. Defaults to 5.");
+  UsageError("");
   UsageError("");
   UsageError("  --dex-location=<string>: location string to use with corresponding");
   UsageError("      apk-fd to find dex files");
@@ -111,12 +119,20 @@
   exit(EXIT_FAILURE);
 }
 
+// Note: make sure you update the Usage if you change these values.
+static constexpr uint16_t kDefaultTestProfileNumDex = 20;
+static constexpr uint16_t kDefaultTestProfileMethodRatio = 5;
+static constexpr uint16_t kDefaultTestProfileClassRatio = 5;
+
 class ProfMan FINAL {
  public:
   ProfMan() :
       reference_profile_file_fd_(kInvalidFd),
       dump_only_(false),
       dump_output_to_fd_(kInvalidFd),
+      test_profile_num_dex_(kDefaultTestProfileNumDex),
+      test_profile_method_ratio_(kDefaultTestProfileMethodRatio),
+      test_profile_class_ratio_(kDefaultTestProfileClassRatio),
       start_ns_(NanoTime()) {}
 
   ~ProfMan() {
@@ -159,6 +175,23 @@
         dex_locations_.push_back(option.substr(strlen("--dex-location=")).ToString());
       } else if (option.starts_with("--apk-fd=")) {
         ParseFdForCollection(option, "--apk-fd", &apks_fd_);
+      } else if (option.starts_with("--generate-test-profile=")) {
+        test_profile_ = option.substr(strlen("--generate-test-profile=")).ToString();
+      } else if (option.starts_with("--generate-test-profile-num-dex=")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-num-dex",
+                        &test_profile_num_dex_,
+                        Usage);
+      } else if (option.starts_with("--generate-test-profile-method-ratio")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-method-ratio",
+                        &test_profile_method_ratio_,
+                        Usage);
+      } else if (option.starts_with("--generate-test-profile-class-ratio")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-class-ratio",
+                        &test_profile_class_ratio_,
+                        Usage);
       } else {
         Usage("Unknown argument '%s'", option.data());
       }
@@ -168,6 +201,15 @@
     bool has_reference_profile = !reference_profile_file_.empty() ||
         FdIsValid(reference_profile_file_fd_);
 
+    if (!test_profile_.empty()) {
+      if (test_profile_method_ratio_ > 100) {
+        Usage("Invalid ratio for --generate-test-profile-method-ratio");
+      }
+      if (test_profile_class_ratio_ > 100) {
+        Usage("Invalid ratio for --generate-test-profile-class-ratio");
+      }
+      return;
+    }
     // --dump-only may be specified with only --reference-profiles present.
     if (!dump_only_ && !has_profiles) {
       Usage("No profile files specified.");
@@ -234,6 +276,7 @@
     MemMap::Init();  // for ZipArchive::OpenFromFd
     std::vector<const DexFile*> dex_files;
     assert(dex_locations_.size() == apks_fd_.size());
+    static constexpr bool kVerifyChecksum = true;
     for (size_t i = 0; i < dex_locations_.size(); ++i) {
       std::string error_msg;
       std::vector<std::unique_ptr<const DexFile>> dex_files_for_location;
@@ -246,6 +289,7 @@
       }
       if (DexFile::OpenFromZip(*zip_archive,
                                dex_locations_[i],
+                               kVerifyChecksum,
                                &error_msg,
                                &dex_files_for_location)) {
       } else {
@@ -315,6 +359,25 @@
     return dump_only_;
   }
 
+  int GenerateTestProfile() {
+    int profile_test_fd = open(test_profile_.c_str(), O_CREAT | O_TRUNC | O_WRONLY);
+    if (profile_test_fd < 0) {
+      std::cerr << "Cannot open " << test_profile_ << strerror(errno);
+      return -1;
+    }
+
+    bool result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
+                                                             test_profile_num_dex_,
+                                                             test_profile_method_ratio_,
+                                                             test_profile_class_ratio_);
+    close(profile_test_fd);  // ignore close result.
+    return result ? 0 : -1;
+  }
+
+  bool ShouldGenerateTestProfile() {
+    return !test_profile_.empty();
+  }
+
  private:
   static void ParseFdForCollection(const StringPiece& option,
                                    const char* arg_name,
@@ -348,6 +411,10 @@
   int reference_profile_file_fd_;
   bool dump_only_;
   int dump_output_to_fd_;
+  std::string test_profile_;
+  uint16_t test_profile_num_dex_;
+  uint16_t test_profile_method_ratio_;
+  uint16_t test_profile_class_ratio_;
   uint64_t start_ns_;
 };
 
@@ -358,6 +425,9 @@
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   profman.ParseArgs(argc, argv);
 
+  if (profman.ShouldGenerateTestProfile()) {
+    return profman.GenerateTestProfile();
+  }
   if (profman.ShouldOnlyDumpProfile()) {
     return profman.DumpProfileInfo();
   }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index aa12c83..0e50eeb 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -109,11 +109,6 @@
   jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
   jit/profile_saver.cc  \
-  lambda/art_lambda_method.cc \
-  lambda/box_table.cc \
-  lambda/closure.cc \
-  lambda/closure_builder.cc \
-  lambda/leaking_allocator.cc \
   jni_internal.cc \
   jobject_comparator.cc \
   linear_alloc.cc \
@@ -132,6 +127,7 @@
   mirror/throwable.cc \
   monitor.cc \
   native_bridge_art_interface.cc \
+  native_stack_dump.cc \
   native/dalvik_system_DexFile.cc \
   native/dalvik_system_VMDebug.cc \
   native/dalvik_system_VMRuntime.cc \
@@ -168,8 +164,8 @@
   offsets.cc \
   os_linux.cc \
   parsed_options.cc \
+  plugin.cc \
   primitive.cc \
-  profiler.cc \
   quick_exception_handler.cc \
   quick/inline_method_analyser.cc \
   reference_table.cc \
@@ -182,6 +178,7 @@
   thread.cc \
   thread_list.cc \
   thread_pool.cc \
+  ti/agent.cc \
   trace.cc \
   transaction.cc \
   type_lookup_table.cc \
@@ -343,6 +340,7 @@
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
   arch/instruction_set.h \
   base/allocator.h \
+  base/enums.h \
   base/mutex.h \
   debugger.h \
   base/unix_file/fd_file.h \
@@ -369,48 +367,26 @@
   oat.h \
   object_callbacks.h \
   process_state.h \
-  profiler_options.h \
   quick/inline_method_analyser.h \
   runtime.h \
   stack.h \
   thread.h \
   thread_state.h \
+  ti/agent.h \
   verifier/method_verifier.h
 
 LIBOPENJDKJVM_SRC_FILES := openjdkjvm/OpenjdkJvm.cc
+LIBOPENJDKJVMTI_SRC_FILES := openjdkjvmti/OpenjdkJvmTi.cc
 
 LIBART_CFLAGS := -DBUILDING_LIBART=1
 
 LIBART_TARGET_CFLAGS :=
 LIBART_HOST_CFLAGS :=
 
-# Default dex2oat instruction set features.
-LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES := default
-LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := default
-2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := default
-ifeq ($(DEX2OAT_TARGET_ARCH),arm)
-  ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a15 krait denver))
-    LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := atomic_ldrd_strd,div
-  else
-    ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a7))
-      LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := div
-    endif
-  endif
-endif
-ifeq ($(2ND_DEX2OAT_TARGET_ARCH),arm)
-  ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a15 krait denver))
-    2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := atomic_ldrd_strd,div
-  else
-    ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a7))
-      2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := div
-    endif
-  endif
-endif
-
 # $(1): target or host
 # $(2): ndebug or debug
 # $(3): static or shared (note that static only applies for host)
-# $(4): module name : either libart or libopenjdkjvm
+# $(4): module name : either libart, libopenjdkjvm, or libopenjdkjvmti
 define build-runtime-library
   ifneq ($(1),target)
     ifneq ($(1),host)
@@ -424,7 +400,9 @@
   endif
   ifneq ($(4),libart)
     ifneq ($(4),libopenjdkjvm)
-      $$(error expected libart of libopenjdkjvm for argument 4, received $(4))
+      ifneq ($(4),libopenjdkjvmti)
+        $$(error expected libart, libopenjdkjvmti, or libopenjdkjvm for argument 4, received $(4))
+      endif
     endif
   endif
 
@@ -462,8 +440,12 @@
       LOCAL_SRC_FILES_64 := $$(LIBART_HOST_SRC_FILES_64)
       LOCAL_IS_HOST_MODULE := true
     endif
-  else # libopenjdkjvm
-    LOCAL_SRC_FILES := $$(LIBOPENJDKJVM_SRC_FILES)
+  else
+    ifeq ($(4),libopenjdkjvmti)
+      LOCAL_SRC_FILES := $$(LIBOPENJDKJVMTI_SRC_FILES)
+    else # libopenjdkjvm
+      LOCAL_SRC_FILES := $$(LIBOPENJDKJVM_SRC_FILES)
+    endif
     ifeq ($$(art_target_or_host),host)
       LOCAL_IS_HOST_MODULE := true
     endif
@@ -499,26 +481,24 @@
 
   # Clang usage
   ifeq ($$(art_target_or_host),target)
-    $$(eval $$(call set-target-local-clang-vars))
+    $$(eval LOCAL_CLANG := $$(ART_TARGET_CLANG))
     $$(eval $$(call set-target-local-cflags-vars,$(2)))
-    LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-    LOCAL_CFLAGS_$(DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
-    LOCAL_CFLAGS_$(2ND_DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
+    LOCAL_ASFLAGS_arm += -no-integrated-as
   else # host
     LOCAL_CLANG := $$(ART_HOST_CLANG)
-    LOCAL_LDLIBS := $$(ART_HOST_LDLIBS)
     LOCAL_LDLIBS += -ldl -lpthread
     ifeq ($$(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
     endif
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS)
-    LOCAL_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES)"
     LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS)
 
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $$(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $$(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $$(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $$(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
     LOCAL_MULTILIB := both
   endif
@@ -571,6 +551,15 @@
       LOCAL_SHARED_LIBRARIES += libartd
     endif
     LOCAL_NOTICE_FILE := $(LOCAL_PATH)/openjdkjvm/NOTICE
+  else
+    ifeq ($(4),libopenjdkjvmti)
+      ifeq ($$(art_ndebug_or_debug),ndebug)
+        LOCAL_SHARED_LIBRARIES += libart
+      else
+        LOCAL_SHARED_LIBRARIES += libartd
+      endif
+      LOCAL_NOTICE_FILE := $(LOCAL_PATH)/openjdkjvmti/NOTICE
+    endif
   endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $$(LOCAL_PATH)/Android.mk
@@ -610,17 +599,21 @@
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-runtime-library,host,ndebug,shared,libart))
   $(eval $(call build-runtime-library,host,ndebug,shared,libopenjdkjvm))
+  $(eval $(call build-runtime-library,host,ndebug,shared,libopenjdkjvmti))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
     $(eval $(call build-runtime-library,host,ndebug,static,libart))
     $(eval $(call build-runtime-library,host,ndebug,static,libopenjdkjvm))
+    $(eval $(call build-runtime-library,host,ndebug,static,libopenjdkjvmti))
   endif
 endif
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-runtime-library,host,debug,shared,libart))
   $(eval $(call build-runtime-library,host,debug,shared,libopenjdkjvm))
+  $(eval $(call build-runtime-library,host,debug,shared,libopenjdkjvmti))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
     $(eval $(call build-runtime-library,host,debug,static,libart))
     $(eval $(call build-runtime-library,host,debug,static,libopenjdkjvm))
+    $(eval $(call build-runtime-library,host,debug,static,libopenjdkjvmti))
   endif
 endif
 
@@ -628,18 +621,17 @@
 #  $(error $(call build-runtime-library,target,ndebug))
   $(eval $(call build-runtime-library,target,ndebug,shared,libart))
   $(eval $(call build-runtime-library,target,ndebug,shared,libopenjdkjvm))
+  $(eval $(call build-runtime-library,target,ndebug,shared,libopenjdkjvmti))
 endif
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
   $(eval $(call build-runtime-library,target,debug,shared,libart))
   $(eval $(call build-runtime-library,target,debug,shared,libopenjdkjvm))
+  $(eval $(call build-runtime-library,target,debug,shared,libopenjdkjvmti))
 endif
 
 # Clear locally defined variables.
 LOCAL_PATH :=
 LIBART_COMMON_SRC_FILES :=
-LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES :=
-LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
-2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
 LIBART_HOST_LDFLAGS :=
 LIBART_TARGET_LDFLAGS :=
 LIBART_TARGET_LDFLAGS_arm :=
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index ee31c58..a857976 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -63,104 +63,97 @@
 // Grab architecture specific constants.
 namespace arm {
 #include "arch/arm/asm_support_arm.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace arm
 
 namespace arm64 {
 #include "arch/arm64/asm_support_arm64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace arm64
 
 namespace mips {
 #include "arch/mips/asm_support_mips.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace mips
 
 namespace mips64 {
 #include "arch/mips64/asm_support_mips64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace mips64
 
 namespace x86 {
 #include "arch/x86/asm_support_x86.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace x86
 
 namespace x86_64 {
 #include "arch/x86_64/asm_support_x86_64.h"
-static constexpr size_t kFrameSizeSaveAllCalleeSave = FRAME_SIZE_SAVE_ALL_CALLEE_SAVE;
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE;
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-}
+static constexpr size_t kFrameSizeSaveAllCalleeSaves = FRAME_SIZE_SAVE_ALL_CALLEE_SAVES;
+#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+static constexpr size_t kFrameSizeSaveRefsOnly = FRAME_SIZE_SAVE_REFS_ONLY;
+#undef FRAME_SIZE_SAVE_REFS_ONLY
+static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARGS;
+#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
+static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
+#undef FRAME_SIZE_SAVE_EVERYTHING
+}  // namespace x86_64
 
 // Check architecture specific constants are sound.
-TEST_F(ArchTest, ARM) {
-  CheckFrameSize(InstructionSet::kArm, Runtime::kSaveAll, arm::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsOnly, arm::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kArm, Runtime::kRefsAndArgs, arm::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-
-TEST_F(ArchTest, ARM64) {
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kSaveAll, arm64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsOnly, arm64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsAndArgs,
-                 arm64::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, MIPS) {
-  CheckFrameSize(InstructionSet::kMips, Runtime::kSaveAll, mips::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsOnly, mips::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kMips, Runtime::kRefsAndArgs,
-                 mips::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, MIPS64) {
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kSaveAll, mips64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsOnly, mips64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsAndArgs,
-                 mips64::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, X86) {
-  CheckFrameSize(InstructionSet::kX86, Runtime::kSaveAll, x86::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsOnly, x86::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kX86, Runtime::kRefsAndArgs, x86::kFrameSizeRefsAndArgsCalleeSave);
-}
-
-TEST_F(ArchTest, X86_64) {
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kSaveAll, x86_64::kFrameSizeSaveAllCalleeSave);
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsOnly, x86_64::kFrameSizeRefsOnlyCalleeSave);
-  CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsAndArgs,
-                 x86_64::kFrameSizeRefsAndArgsCalleeSave);
-}
+#define TEST_ARCH(Arch, arch)                             \
+  TEST_F(ArchTest, Arch) {                                \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveAllCalleeSaves,          \
+                   arch::kFrameSizeSaveAllCalleeSaves);   \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveRefsOnly,                \
+                   arch::kFrameSizeSaveRefsOnly);         \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveRefsAndArgs,             \
+                   arch::kFrameSizeSaveRefsAndArgs);      \
+    CheckFrameSize(InstructionSet::k##Arch,               \
+                   Runtime::kSaveEverything,              \
+                   arch::kFrameSizeSaveEverything);       \
+  }
+TEST_ARCH(Arm, arm)
+TEST_ARCH(Arm64, arm64)
+TEST_ARCH(Mips, mips)
+TEST_ARCH(Mips64, mips64)
+TEST_ARCH(X86, x86)
+TEST_ARCH(X86_64, x86_64)
 
 }  // namespace art
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index 44c7649..38ca76a 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -30,18 +30,17 @@
 .arch armv7-a
 .thumb
 
-// Macro to generate the value of Runtime::Current into rDest clobbering rTemp. As it uses labels
+// Macro to generate the value of Runtime::Current into rDest. As it uses labels
 // then the labels need to be unique. We bind these to the function name in the ENTRY macros.
-.macro RUNTIME_CURRENT name, num, rDest, rTemp
+.macro RUNTIME_CURRENT name, num, rDest
     .if .Lruntime_current\num\()_used
          .error
     .endif
     .set .Lruntime_current\num\()_used, 1
-    ldr \rDest, .Lgot_\name\()_\num               @ Load offset of the GOT.
-    ldr \rTemp, .Lruntime_instance_\name\()_\num  @ Load GOT offset of Runtime::instance_.
+    ldr \rDest, .Lruntime_instance_\name\()_\num  @ Load GOT_PREL offset of Runtime::instance_.
 .Lload_got_\name\()_\num\():
-    add \rDest, pc                                @ Fixup GOT address.
-    ldr \rDest, [\rDest, \rTemp]                  @ Load address of Runtime::instance_.
+    add \rDest, pc                                @ Fixup GOT_PREL address.
+    ldr \rDest, [\rDest]                          @ Load address of Runtime::instance_.
     ldr \rDest, [\rDest]                          @ Load Runtime::instance_.
 .endm
 
@@ -90,26 +89,20 @@
     DEF_ENTRY .arm, \name
 .endm
 
-// Terminate an ENTRY and generate GOT references.
+// Terminate an ENTRY and generate GOT_PREL references.
 .macro END name
      // Generate offsets of GOT and Runtime::instance_ used in RUNTIME_CURRENT.
      .if .Lruntime_current1_used
-         .Lgot_\name\()_1:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_1+4)
          .Lruntime_instance_\name\()_1:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_1+4)
      .endif
      .if .Lruntime_current2_used
-         .Lgot_\name\()_2:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_2+4)
          .Lruntime_instance_\name\()_2:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_2+4)
     .endif
      .if .Lruntime_current3_used
-         .Lgot_\name\()_3:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_3+4)
          .Lruntime_instance_\name\()_3:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_3+4)
     .endif
     // Remove the RUNTIME_CURRENTx macros so they get rebound in the next function entry.
     .purgem RUNTIME_CURRENT1
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 1fa566b..c03bcae 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -19,9 +19,10 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 112
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 112
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 112
+#define FRAME_SIZE_SAVE_REFS_ONLY 32
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
+#define FRAME_SIZE_SAVE_EVERYTHING 192
 
 // Flag for enabling R4 optimization in arm runtime
 // #define ARM_R4_SUSPEND_FLAG
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index f0e9ac5..492a12d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -27,9 +27,27 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
 
 // Used by soft float.
 // Single-precision FP arithmetics.
@@ -97,12 +115,43 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pStringCompareTo = art_quick_string_compareto;
+  // The ARM StringCompareTo intrinsic does not call the runtime.
+  qpoints->pStringCompareTo = nullptr;
   qpoints->pMemcpy = memcpy;
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = artReadBarrierMark;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = nullptr;  // Cannot use register 12 (IP) to pass arguments.
+  qpoints->pReadBarrierMarkReg13 = nullptr;  // Cannot use register 13 (SP) to pass arguments.
+  qpoints->pReadBarrierMarkReg14 = nullptr;  // Cannot use register 14 (LR) to pass arguments.
+  qpoints->pReadBarrierMarkReg15 = nullptr;  // Cannot use register 15 (PC) to pass arguments.
+  // ARM has only 16 core registers.
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index d81e0a9..befdd48 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -20,6 +20,7 @@
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/hex_dump.h"
 #include "globals.h"
@@ -34,7 +35,7 @@
 
 namespace art {
 
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_implicit_suspend();
 
@@ -107,8 +108,10 @@
   *out_return_pc = (sc->arm_pc + instr_size) | 1;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // ARM PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.  However the mapping table has
@@ -122,7 +125,10 @@
 
   uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->arm_r0 = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -139,7 +145,8 @@
                                void* context) {
   // These are the instructions to check for.  The first one is the ldr r0,[r9,#xxx]
   // where xxx is the offset of the suspend trigger.
-  uint32_t checkinst1 = 0xf8d90000 + Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
+  uint32_t checkinst1 = 0xf8d90000
+      + Thread::ThreadSuspendTriggerOffset<PointerSize::k32>().Int32Value();
   uint16_t checkinst2 = 0x6800;
 
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index 51f992b..c3a5829 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -16,7 +16,7 @@
 
 #include "instruction_set_features_arm.h"
 
-#if defined(__ANDROID__) && defined(__arm__)
+#if defined(ART_TARGET_ANDROID) && defined(__arm__)
 #include <sys/auxv.h>
 #include <asm/hwcap.h>
 #endif
@@ -166,7 +166,7 @@
   bool has_div = false;
   bool has_lpae = false;
 
-#if defined(__ANDROID__) && defined(__arm__)
+#if defined(ART_TARGET_ANDROID) && defined(__arm__)
   uint64_t hwcaps = getauxval(AT_HWCAP);
   LOG(INFO) << "hwcaps=" << hwcaps;
   if ((hwcaps & HWCAP_IDIVT) != 0) {
@@ -206,6 +206,7 @@
   struct sigaction sa, osa;
   sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
   sa.sa_sigaction = bad_divide_inst_handle;
+  sigemptyset(&sa.sa_mask);
   sigaction(SIGILL, &sa, &osa);
 
   bool has_div = false;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index fa8c8f9..881bebe 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -42,30 +42,31 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp1, rTemp2
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp
     SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
     vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
     .cfi_adjust_cfa_offset 64
     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     .cfi_adjust_cfa_offset 12
-    RUNTIME_CURRENT1 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kSaveAll Method*.
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    RUNTIME_CURRENT1 \rTemp                       @ Load Runtime::Current into rTemp.
+    @ Load kSaveAllCalleeSaves Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 64 + 12)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected."
 #endif
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME rTemp1, rTemp2
+.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
     push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     .cfi_adjust_cfa_offset 28
     .cfi_rel_offset r5, 0
@@ -77,48 +78,19 @@
     .cfi_rel_offset lr, 24
     sub sp, #4                                    @ bottom word will hold Method*
     .cfi_adjust_cfa_offset 4
-    RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
+    @ Load kSaveRefsOnly Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
 #endif
 .endm
 
-    /*
-     * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
-     * and preserves the value of rTemp2 at entry.
-     */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_RTEMP2 rTemp1, rTemp2
-    push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
-    .cfi_adjust_cfa_offset 28
-    .cfi_rel_offset r5, 0
-    .cfi_rel_offset r6, 4
-    .cfi_rel_offset r7, 8
-    .cfi_rel_offset r8, 12
-    .cfi_rel_offset r10, 16
-    .cfi_rel_offset r11, 20
-    .cfi_rel_offset lr, 24
-    sub sp, #4                                    @ bottom word will hold Method*
-    .cfi_adjust_cfa_offset 4
-    str \rTemp2, [sp, #0]                         @ save rTemp2
-    RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
-    ldr \rTemp2, [sp, #0]                         @ restore rTemp2
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
-    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
-
-    // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
-#endif
-.endm
-
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     add sp, #4               @ bottom word holds Method*
     .cfi_adjust_cfa_offset -4
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
@@ -132,16 +104,16 @@
     .cfi_adjust_cfa_offset -28
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bx  lr                   @ return
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
     .cfi_adjust_cfa_offset 40
     .cfi_rel_offset r1, 0
@@ -156,30 +128,30 @@
     .cfi_rel_offset lr, 36
     vpush {s0-s15}                     @ 16 words of float args.
     .cfi_adjust_cfa_offset 64
-    sub sp, #8                         @ 2 words of space, bottom word will hold Method*
+    sub sp, #8                         @ 2 words of space, alignment padding and Method*
     .cfi_adjust_cfa_offset 8
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 64 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected."
 #endif
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
-    RUNTIME_CURRENT3 \rTemp1, \rTemp2  @ Load Runtime::Current into rTemp1.
-     @ rTemp1 is kRefsAndArgs Method*.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
+    RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
+    @ Load kSaveRefsAndArgs Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
-    str r0, [sp, #0]                   @ Store ArtMethod* to bottom of stack.
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
+    str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     add  sp, #8                      @ rewind sp
     .cfi_adjust_cfa_offset -8
     vpop {s0-s15}
@@ -198,6 +170,65 @@
     .cfi_adjust_cfa_offset -40
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME rTemp
+    push {r0-r12, lr}                   @ 14 words of callee saves and args.
+    .cfi_adjust_cfa_offset 56
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r6, 24
+    .cfi_rel_offset r7, 28
+    .cfi_rel_offset r8, 32
+    .cfi_rel_offset r9, 36
+    .cfi_rel_offset r10, 40
+    .cfi_rel_offset r11, 44
+    .cfi_rel_offset ip, 48
+    .cfi_rel_offset lr, 52
+    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
+    .cfi_adjust_cfa_offset 128
+    sub sp, #8                          @ 2 words of space, alignment padding and Method*
+    .cfi_adjust_cfa_offset 8
+    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
+    @ Load kSaveEverything Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
+#endif
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    add  sp, #8                         @ rewind sp
+    .cfi_adjust_cfa_offset -8
+    vpop {d0-d15}
+    .cfi_adjust_cfa_offset -128
+    pop {r0-r12, lr}                    @ 14 words of callee saves
+    .cfi_restore r0
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r9
+    .cfi_restore r10
+    .cfi_restore r11
+    .cfi_restore r12
+    .cfi_restore lr
+    .cfi_adjust_cfa_offset -56
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz   r0, 1f              @ result non-zero branch over
     bx     lr                  @ return
@@ -217,7 +248,7 @@
 .macro DELIVER_PENDING_EXCEPTION
     .fnend
     .fnstart
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1    @ save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
     mov    r0, r9                              @ pass Thread::Current
     b      artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
 .endm
@@ -225,7 +256,7 @@
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r0, r1 // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
     mov r0, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -234,7 +265,7 @@
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r1, r2  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
     mov r1, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -243,7 +274,7 @@
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r2, r3  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -275,11 +306,11 @@
 .macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case of GC
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
     bl     \entrypoint                   @ (uint32_t field_idx, const Method* referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -287,11 +318,11 @@
 .macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
-    ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
     bl     \entrypoint                   @ (field_idx, Object*, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -299,14 +330,14 @@
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12  @ save callee saves in case of GC
-    ldr    r3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r3        @ save callee saves in case of GC
+    ldr    r3, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint                   @ (field_idx, Object*, new_val, referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     \return
 END \name
 .endm
@@ -323,6 +354,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -334,6 +370,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
@@ -360,12 +402,12 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
     mov    r2, r9                         @ pass Thread::Current
     mov    r3, sp
     bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
     mov    r12, r1                        @ save Method*->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
     bx     r12                            @ tail call to target
 1:
@@ -539,7 +581,7 @@
     ldr    r2, [r9, #THREAD_ID_OFFSET]
     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     mov    r3, r1
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     cbnz   r3, .Lnot_unlocked         @ already thin locked
     @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
     orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
@@ -555,9 +597,9 @@
     cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
                                       @ else contention, go to slow path
     mov    r3, r1                     @ copy the lock word to check count overflow.
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits.
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits.
     add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
-    lsr    r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  @ if either of the upper two bits (28-29) are set, we overflowed.
+    lsr    r3, r2, #LOCK_WORD_GC_STATE_SHIFT    @ if the first gc state bit is set, we overflowed.
     cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
     add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
@@ -566,19 +608,19 @@
 .Llock_strex_fail:
     b      .Lretry_lock               @ retry
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object
 
 ENTRY art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object_no_inline
@@ -600,17 +642,17 @@
     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
     ldr    r2, [r9, #THREAD_ID_OFFSET]
     mov    r3, r1                     @ copy lock word to check thread id equality
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
     uxth   r3, r3                     @ zero top 16 bits
     cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
     mov    r3, r1                     @ copy lock word to detect transition to unlocked
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
     cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
     @ transition to unlocked
     mov    r3, r1
-    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK  @ r3: zero except for the preserved read barrier bits
+    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  @ r3: zero except for the preserved gc bits
     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
 #ifndef USE_READ_BARRIER
     str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
@@ -632,20 +674,20 @@
     b      .Lretry_unlock             @ retry
 .Lslow_unlock:
     @ save callee saves in case exception allocation triggers GC
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
+    SETUP_SAVE_REFS_ONLY_FRAME r1
     mov    r1, r9                     @ pass Thread::Current
     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_unlock_object
 
 ENTRY art_quick_unlock_object_no_inline
     @ save callee saves in case exception allocation triggers GC
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
+    SETUP_SAVE_REFS_ONLY_FRAME r1
     mov    r1, r9                     @ pass Thread::Current
     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_unlock_object_no_inline
@@ -677,7 +719,7 @@
     .cfi_restore r0
     .cfi_restore r1
     .cfi_restore lr
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2, r3  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     b   artThrowClassCastException  @ (Class*, Class*, Thread*)
     bkpt
@@ -691,6 +733,12 @@
     .endif
 .endm
 
+// Save rReg's value to [sp, #offset].
+.macro PUSH_REG rReg, offset
+    str \rReg, [sp, #\offset]       @ save rReg
+    .cfi_rel_offset \rReg, \offset
+.endm
+
     /*
      * Macro to insert read barrier, only used in art_quick_aput_obj.
      * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
@@ -813,7 +861,7 @@
 .Lthrow_array_store_exception:
     pop {r0-r2, lr}
     /* No need to repeat restore cfi directives, the ones above apply here. */
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r3, ip
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
     mov r1, r2
     mov r2, r9                     @ pass Thread::Current
     b artThrowArrayStoreException  @ (Class*, Class*, Thread*)
@@ -824,10 +872,10 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r1, r2  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case of GC
     mov    r1, r9                     @ pass Thread::Current
     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -836,10 +884,10 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -848,11 +896,11 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r3     @ save callee saves in case of GC
     mov    r3, r9                     @ pass Thread::Current
     @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -861,13 +909,13 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_RTEMP2  r12, r3  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r12    @ save callee saves in case of GC
     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint
     add    sp, #16                    @ strip the extra frame
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -890,12 +938,12 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
     bl     artGet64StaticFromCode        @ (uint32_t field_idx, const Method* referrer, Thread*)
     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz   r2, 1f                        @ success if no exception pending
     bx     lr                            @ return on success
 1:
@@ -916,12 +964,12 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
-    ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
+    ldr    r2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
     bl     artGet64InstanceFromCode      @ (field_idx, Object*, referrer, Thread*)
     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz   r2, 1f                        @ success if no exception pending
     bx     lr                            @ return on success
 1:
@@ -941,15 +989,15 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r12   @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
                                          @ r2:r3 contain the wide argument
-    ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    ldr    r1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     artSet64StaticFromCode        @ (field_idx, referrer, new_val, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_set64_static
@@ -966,9 +1014,9 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12, lr  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
                                          @ r2:r3 contain the wide argument
-    ldr    r12, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
+    ldr    r12, [sp, #FRAME_SIZE_SAVE_REFS_ONLY]  @ pass referrer
     str    r9, [sp, #-12]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 12
     str    r12, [sp, #-4]!               @ expand the frame and pass the referrer
@@ -976,17 +1024,43 @@
     bl     artSet64InstanceFromCode      @ (field_idx, Object*, new_val, Method* referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
+    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
 END art_quick_set64_instance
 
     /*
-     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the string index. The fast
-     * path check for hit in strings cache has already been performed.
+     * Entry from managed code to resolve a string, this stub will
+     * check the dex cache for a matching string (the fast path), and if not found,
+     * it will allocate a String and deliver an exception on error.
+     * On success the String is returned. R0 holds the string index.
      */
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+ENTRY art_quick_resolve_string
+    ldr    r1, [sp]                                              @ load referrer
+    ldr    r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET]          @ load declaring class
+    ldr    r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]   @ load string dex cache
+    ubfx   r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS
+    add    r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
+    ldrd   r2, r3, [r1]                                    @ load index into r3 and pointer into r2
+    cmp    r0, r3
+    bne    .Lart_quick_resolve_string_slow_path
+#ifdef USE_READ_BARRIER
+    ldr    r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tst    r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    beq    .Lart_quick_resolve_string_slow_path
+#endif
+    mov    r0, r2
+    bx     lr
+
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME r2                                @ save callee saves in case of GC
+    mov    r1, r9                                                @ pass Thread::Current
+    mov    r3, sp
+    bl     artResolveStringFromCode                              @ (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_resolve_string
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
@@ -1092,10 +1166,10 @@
     bx     lr
 
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
@@ -1177,10 +1251,10 @@
     ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
+    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
     mov    r2, r9                                             // Pass Thread::Current.
     bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_tlab
 
@@ -1198,9 +1272,15 @@
     ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
                                                               // Read barrier for class load.
     ldr    r3, [r9, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+    cbnz   r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
+    cbz    r2, .Lart_quick_alloc_object_region_tlab_slow_path  // Null check for loading lock word.
+    // Check lock word for mark bit, if marked do the allocation.
+    ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    ands r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    bne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
                                                               // The read barrier slow path. Mark
                                                               // the class.
@@ -1212,10 +1292,10 @@
     pop    {r0, r1, r3, lr}
     b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_object_region_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
+    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
     mov    r2, r9                                             // Pass Thread::Current.
     bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_region_tlab
 
@@ -1225,24 +1305,25 @@
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
 #ifdef ARM_R4_SUSPEND_FLAG
-    ldrh   r0, [rSELF, #THREAD_FLAGS_OFFSET]
-    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL  @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
-    cbnz   r0, 1f                             @ check Thread::Current()->suspend_count_ == 0
-    bx     lr                                 @ return if suspend_count_ == 0
+    ldrh   rSUSPEND, [rSELF, #THREAD_FLAGS_OFFSET]
+    cbnz   rSUSPEND, 1f                         @ check Thread::Current()->suspend_count_ == 0
+    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
+    bx     lr                                   @ return if suspend_count_ == 0
 1:
+    mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
 #endif
+    SETUP_SAVE_EVERYTHING_FRAME r0              @ save everything for GC stack crawl
     mov    r0, rSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for GC stack crawl
-    @ TODO: save FPRs to enable access in the debugger?
-    bl     artTestSuspendFromCode             @ (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    bl     artTestSuspendFromCode               @ (Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME
+    bx     lr
 END art_quick_test_suspend
 
 ENTRY art_quick_implicit_suspend
     mov    r0, rSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for stack crawl
+    SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
     bl     artTestSuspendFromCode             @ (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
 END art_quick_implicit_suspend
 
     /*
@@ -1252,15 +1333,15 @@
      */
      .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz    r2, 1f                 @ success if no exception is pending
     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
     bx      lr                     @ return on success
@@ -1303,17 +1384,17 @@
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
     cbz     r0, 1f                 @ is code pointer null? goto exception
     mov     r12, r0
     ldr  r0, [sp, #0]              @ load resolved method in r0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bx      r12                    @ tail-call into actual code
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -1321,7 +1402,7 @@
      * Called to do a generic JNI down-call
      */
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
 
     // Save rSELF
     mov r11, rSELF
@@ -1388,16 +1469,16 @@
     .cfi_def_cfa_register sp
 
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
 
     // store into fpr, for when it's a fpr return...
     vmov d0, r0, r1
     bx lr      // ret
     // Undo the unwinding information from above since it doesn't apply below.
     .cfi_def_cfa_register r10
-    .cfi_adjust_cfa_offset FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+    .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
 
 .Lexception_in_native:
     ldr sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
@@ -1408,15 +1489,15 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r1, r2
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
     mov     r1, r9                 @ pass Thread::Current
     mov     r2, sp                 @ pass SP
     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     // Tear down the callee-save frame. Skip arg registers.
-    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     cbnz    r2, 1f                 @ success if no exception is pending
     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
     bx      lr                     @ return on success
@@ -1431,22 +1512,22 @@
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
     @ Make stack crawlable and clobber r2 and r3 (post saving)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
-    @ preserve r0 (not normally an arg) knowing there is a spare slot in kRefsAndArgs.
+    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
+    @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
     str   r0, [sp, #4]
     mov   r2, r9         @ pass Thread::Current
     mov   r3, lr         @ pass LR
     blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, LR)
     mov   r12, r0        @ r12 holds reference to code
     ldr   r0, [sp, #4]   @ restore r0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     blx   r12            @ call method with lr set to art_quick_instrumentation_exit
 @ Deliberate fall-through into art_quick_instrumentation_exit.
     .type art_quick_instrumentation_exit, #function
     .global art_quick_instrumentation_exit
 art_quick_instrumentation_exit:
     mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ set up frame knowing r2 and r3 must be dead on exit
+    SETUP_SAVE_REFS_ONLY_FRAME r2  @ set up frame knowing r2 and r3 must be dead on exit
     mov   r12, sp        @ remember bottom of caller's frame
     push  {r0-r1}        @ save return value
     .cfi_adjust_cfa_offset 8
@@ -1485,7 +1566,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
     mov    r0, r9         @ Set up args.
     blx    artDeoptimize  @ artDeoptimize(Thread*)
 END art_quick_deoptimize
@@ -1496,7 +1577,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
     mov    r0, r9                         @ Set up args.
     blx    artDeoptimizeFromCompiledCode  @ artDeoptimizeFromCompiledCode(Thread*)
 END art_quick_deoptimize_from_compiled_code
@@ -1704,145 +1785,6 @@
     pop {r4, r10-r11, pc}
 END art_quick_indexof
 
-   /*
-     * String's compareTo.
-     *
-     * Requires rARG0/rARG1 to have been previously checked for null.  Will
-     * return negative if this's string is < comp, 0 if they are the
-     * same and positive if >.
-     *
-     * On entry:
-     *    r0:   this object pointer
-     *    r1:   comp object pointer
-     *
-     */
-    .extern __memcmp16
-ENTRY art_quick_string_compareto
-    mov    r2, r0         @ this to r2, opening up r0 for return value
-    sub    r0, r2, r1     @ Same?
-    cbnz   r0,1f
-    bx     lr
-1:                        @ Same strings, return.
-
-    push {r4, r7-r12, lr} @ 8 words - keep alignment
-    .cfi_adjust_cfa_offset 32
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r7, 4
-    .cfi_rel_offset r8, 8
-    .cfi_rel_offset r9, 12
-    .cfi_rel_offset r10, 16
-    .cfi_rel_offset r11, 20
-    .cfi_rel_offset r12, 24
-    .cfi_rel_offset lr, 28
-
-    ldr    r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
-    add    r2, #MIRROR_STRING_VALUE_OFFSET
-    add    r1, #MIRROR_STRING_VALUE_OFFSET
-
-    /*
-     * At this point, we have:
-     *    value:  r2/r1
-     *    offset: r4/r9
-     *    count:  r7/r10
-     * We're going to compute
-     *    r11 <- countDiff
-     *    r10 <- minCount
-     */
-     subs  r11, r7, r10
-     it    ls
-     movls r10, r7
-
-     /*
-      * Note: data pointers point to previous element so we can use pre-index
-      * mode with base writeback.
-      */
-     subs  r2, #2   @ offset to contents[-1]
-     subs  r1, #2   @ offset to contents[-1]
-
-     /*
-      * At this point we have:
-      *   r2: *this string data
-      *   r1: *comp string data
-      *   r10: iteration count for comparison
-      *   r11: value to return if the first part of the string is equal
-      *   r0: reserved for result
-      *   r3, r4, r7, r8, r9, r12 available for loading string data
-      */
-
-    subs  r10, #2
-    blt   .Ldo_remainder2
-
-      /*
-       * Unroll the first two checks so we can quickly catch early mismatch
-       * on long strings (but preserve incoming alignment)
-       */
-
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    ldrh  r7, [r2, #2]!
-    ldrh  r8, [r1, #2]!
-    subs  r0, r3, r4
-    it    eq
-    subseq  r0, r7, r8
-    bne   .Ldone
-    cmp   r10, #28
-    bgt   .Ldo_memcmp16
-    subs  r10, #3
-    blt   .Ldo_remainder
-
-.Lloopback_triple:
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    ldrh  r7, [r2, #2]!
-    ldrh  r8, [r1, #2]!
-    ldrh  r9, [r2, #2]!
-    ldrh  r12,[r1, #2]!
-    subs  r0, r3, r4
-    it    eq
-    subseq  r0, r7, r8
-    it    eq
-    subseq  r0, r9, r12
-    bne   .Ldone
-    subs  r10, #3
-    bge   .Lloopback_triple
-
-.Ldo_remainder:
-    adds  r10, #3
-    beq   .Lreturn_diff
-
-.Lloopback_single:
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    subs  r0, r3, r4
-    bne   .Ldone
-    subs  r10, #1
-    bne   .Lloopback_single
-
-.Lreturn_diff:
-    mov   r0, r11
-    pop   {r4, r7-r12, pc}
-
-.Ldo_remainder2:
-    adds  r10, #2
-    bne   .Lloopback_single
-    mov   r0, r11
-    pop   {r4, r7-r12, pc}
-
-    /* Long string case */
-.Ldo_memcmp16:
-    mov   r7, r11
-    add   r0, r2, #2
-    add   r1, r1, #2
-    mov   r2, r10
-    bl    __memcmp16
-    cmp   r0, #0
-    it    eq
-    moveq r0, r7
-.Ldone:
-    pop   {r4, r7-r12, pc}
-END art_quick_string_compareto
-
     /* Assembly routines used to handle ABI differences. */
 
     /* double fmod(double a, double b) */
@@ -1876,7 +1818,7 @@
     add   sp, #4
     .cfi_adjust_cfa_offset -4
     pop   {pc}
-END art_quick_fmod
+END art_quick_fmodf
 
     /* int64_t art_d2l(double d) */
     .extern art_d2l
@@ -1906,3 +1848,104 @@
     .cfi_adjust_cfa_offset -4
     pop   {pc}
 END art_quick_l2f
+
+.macro CONDITIONAL_CBZ reg, reg_if, dest
+.ifc \reg, \reg_if
+    cbz \reg, \dest
+.endif
+.endm
+
+.macro CONDITIONAL_CMPBZ reg, reg_if, dest
+.ifc \reg, \reg_if
+    cmp \reg, #0
+    beq \dest
+.endif
+.endm
+
+// Use CBZ if the register is in {r0, r7} otherwise compare and branch.
+.macro SMART_CBZ reg, dest
+    CONDITIONAL_CBZ \reg, r0, \dest
+    CONDITIONAL_CBZ \reg, r1, \dest
+    CONDITIONAL_CBZ \reg, r2, \dest
+    CONDITIONAL_CBZ \reg, r3, \dest
+    CONDITIONAL_CBZ \reg, r4, \dest
+    CONDITIONAL_CBZ \reg, r5, \dest
+    CONDITIONAL_CBZ \reg, r6, \dest
+    CONDITIONAL_CBZ \reg, r7, \dest
+    CONDITIONAL_CMPBZ \reg, r8, \dest
+    CONDITIONAL_CMPBZ \reg, r9, \dest
+    CONDITIONAL_CMPBZ \reg, r10, \dest
+    CONDITIONAL_CMPBZ \reg, r11, \dest
+    CONDITIONAL_CMPBZ \reg, r12, \dest
+    CONDITIONAL_CMPBZ \reg, r13, \dest
+    CONDITIONAL_CMPBZ \reg, r14, \dest
+    CONDITIONAL_CMPBZ \reg, r15, \dest
+.endm
+
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through register
+     * `reg`, saving and restoring all caller-save registers.
+     *
+     * If `reg` is different from `r0`, the generated function follows a
+     * non-standard runtime calling convention:
+     * - register `reg` is used to pass the (sole) argument of this
+     *   function (instead of R0);
+     * - register `reg` is used to return the result of this function
+     *   (instead of R0);
+     * - R0 is treated like a normal (non-argument) caller-save register;
+     * - everything else is the same as in the standard runtime calling
+     *   convention (e.g. standard callee-save registers are preserved).
+     */
+.macro READ_BARRIER_MARK_REG name, reg
+ENTRY \name
+    // Null check so that we can load the lock word.
+    SMART_CBZ \reg, .Lret_rb_\name
+    // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
+    ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    ands ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    beq .Lslow_rb_\name
+    // Already marked, return right away.
+    bx lr
+
+.Lslow_rb_\name:
+    push  {r0-r5, r9, lr}               @ save return address and core caller-save registers
+                                        @ also save callee save r5 for 16 byte alignment
+    .cfi_adjust_cfa_offset 32
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r9, 24
+    .cfi_rel_offset lr, 28
+    vpush {s0-s15}                      @ save floating-point caller-save registers
+    .cfi_adjust_cfa_offset 64
+
+    .ifnc \reg, r0
+      mov   r0, \reg                    @ pass arg1 - obj from `reg`
+    .endif
+    bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
+    mov ip, r0                          @ Save result in IP
+    vpop {s0-s15}                       @ restore floating-point registers
+    .cfi_adjust_cfa_offset -64
+    pop   {r0-r5, r9, lr}               @ restore caller-save registers
+    mov \reg, ip                        @ copy result to reg
+.Lret_rb_\name:
+    bx lr
+END \name
+.endm
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 5580ee4..4b23c77 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -34,6 +34,9 @@
     (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3);
 static constexpr uint32_t kArmCalleeSaveAllSpills =
     (1 << art::arm::R4) | (1 << art::arm::R9);
+static constexpr uint32_t kArmCalleeSaveEverythingSpills =
+    (1 << art::arm::R0) | (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3) |
+    (1 << art::arm::R4) | (1 << art::arm::R9) | (1 << art::arm::R12);
 
 static constexpr uint32_t kArmCalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kArmCalleeSaveFpRefSpills = 0;
@@ -47,23 +50,27 @@
     (1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) |
     (1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) |
     (1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31);
+static constexpr uint32_t kArmCalleeSaveFpEverythingSpills =
+    kArmCalleeSaveFpArgSpills | kArmCalleeSaveFpAllSpills;
 
 constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArmCalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArmCalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArmCalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArmCalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(ArmCalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(ArmCalleeSaveFpSpills(type)) /* fprs */ +
-                  1 /* Method* */) * kArmPointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kArmPointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo ArmCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
@@ -75,17 +82,17 @@
 constexpr size_t ArmCalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
          (POPCOUNT(ArmCalleeSaveCoreSpills(type)) +
-          POPCOUNT(ArmCalleeSaveFpSpills(type))) * kArmPointerSize;
+          POPCOUNT(ArmCalleeSaveFpSpills(type))) * static_cast<size_t>(kArmPointerSize);
 }
 
 constexpr size_t ArmCalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
-         POPCOUNT(ArmCalleeSaveCoreSpills(type)) * kArmPointerSize;
+         POPCOUNT(ArmCalleeSaveCoreSpills(type)) * static_cast<size_t>(kArmPointerSize);
 }
 
 constexpr size_t ArmCalleeSaveLrOffset(Runtime::CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
-      POPCOUNT(ArmCalleeSaveCoreSpills(type) & (-(1 << LR))) * kArmPointerSize;
+      POPCOUNT(ArmCalleeSaveCoreSpills(type) & (-(1 << LR))) * static_cast<size_t>(kArmPointerSize);
 }
 
 }  // namespace arm
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index 2a551a8..ff4f81b 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -17,15 +17,16 @@
 #include "thread.h"
 
 #include "asm_support_arm.h"
+#include "base/enums.h"
 #include "base/logging.h"
 
 namespace art {
 
 void Thread::InitCpu() {
-  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<4>().Int32Value());
+  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<PointerSize::k32>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 989ecc6..5e7b51d 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 176
+#define FRAME_SIZE_SAVE_REFS_ONLY 96
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 224
+#define FRAME_SIZE_SAVE_EVERYTHING 512
 
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 1618ced..55b09c3 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -27,8 +27,46 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
+
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg16(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg17(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg18(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg19(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg20(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg23(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg24(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg25(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg26(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg27(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg28(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
@@ -80,12 +118,51 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pStringCompareTo = art_quick_string_compareto;
+  // The ARM64 StringCompareTo intrinsic does not call the runtime.
+  qpoints->pStringCompareTo = nullptr;
   qpoints->pMemcpy = memcpy;
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = artReadBarrierMark;
+  // ARM64 is the architecture with the largest number of core
+  // registers (32) that supports the read barrier configuration.
+  // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass
+  // arguments, only define ReadBarrierMarkRegX entrypoints for the
+  // first 30 registers.  This limitation is not a problem on other
+  // supported architectures (ARM, x86 and x86-64) either, as they
+  // have less core registers (resp. 16, 8 and 16).  (We may have to
+  // revise that design choice if read barrier support is added for
+  // MIPS and/or MIPS64.)
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
+  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
+  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
+  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
+  qpoints->pReadBarrierMarkReg16 = nullptr;  // IP0 is used as a temp by the asm stub.
+  qpoints->pReadBarrierMarkReg17 = art_quick_read_barrier_mark_reg17;
+  qpoints->pReadBarrierMarkReg18 = art_quick_read_barrier_mark_reg18;
+  qpoints->pReadBarrierMarkReg19 = art_quick_read_barrier_mark_reg19;
+  qpoints->pReadBarrierMarkReg20 = art_quick_read_barrier_mark_reg20;
+  qpoints->pReadBarrierMarkReg21 = art_quick_read_barrier_mark_reg21;
+  qpoints->pReadBarrierMarkReg22 = art_quick_read_barrier_mark_reg22;
+  qpoints->pReadBarrierMarkReg23 = art_quick_read_barrier_mark_reg23;
+  qpoints->pReadBarrierMarkReg24 = art_quick_read_barrier_mark_reg24;
+  qpoints->pReadBarrierMarkReg25 = art_quick_read_barrier_mark_reg25;
+  qpoints->pReadBarrierMarkReg26 = art_quick_read_barrier_mark_reg26;
+  qpoints->pReadBarrierMarkReg27 = art_quick_read_barrier_mark_reg27;
+  qpoints->pReadBarrierMarkReg28 = art_quick_read_barrier_mark_reg28;
+  qpoints->pReadBarrierMarkReg29 = art_quick_read_barrier_mark_reg29;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 3e9ad0d..6724d6d 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -20,6 +20,7 @@
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "base/logging.h"
@@ -29,7 +30,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 //
@@ -84,8 +85,10 @@
   *out_return_pc = sc->pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -95,7 +98,10 @@
 
   sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
 
-  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -112,7 +118,8 @@
                                void* context) {
   // These are the instructions to check for.  The first one is the ldr x0,[r18,#xxx]
   // where xxx is the offset of the suspend trigger.
-  uint32_t checkinst1 = 0xf9400240 | (Thread::ThreadSuspendTriggerOffset<8>().Int32Value() << 7);
+  uint32_t checkinst1 = 0xf9400240 |
+      (Thread::ThreadSuspendTriggerOffset<PointerSize::k64>().Int32Value() << 7);
   uint32_t checkinst2 = 0xf9400000;
 
   struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 209e7f0..202846a 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -21,25 +21,25 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    // Loads appropriate callee-save-method.
-    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
 
     sub sp, sp, #176
     .cfi_adjust_cfa_offset 176
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 176)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
 #endif
 
     // Stack alignment filler [sp, #8].
@@ -74,7 +74,7 @@
     .cfi_rel_offset x29, 160
     .cfi_rel_offset x30, 168
 
-    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs].
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
     str xIP0, [sp]
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
@@ -83,25 +83,25 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefOnly]  .
-    // Loads appropriate callee-save-method.
-    ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
 
     sub sp, sp, #96
     .cfi_adjust_cfa_offset 96
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 96)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
 #endif
 
     // GP callee-saves.
@@ -126,7 +126,7 @@
     .cfi_rel_offset x29, 80
     .cfi_rel_offset x30, 88
 
-    // Store ArtMethod* Runtime::callee_save_methods_[kRefsOnly].
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
     stp xIP0, x20, [sp]
     .cfi_rel_offset x20, 8
 
@@ -136,7 +136,7 @@
 .endm
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     // Callee-saves.
     ldr x20, [sp, #8]
     .cfi_restore x20
@@ -165,24 +165,24 @@
     .cfi_adjust_cfa_offset -96
 .endm
 
-.macro POP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro POP_SAVE_REFS_ONLY_FRAME
     add sp, sp, #96
     .cfi_adjust_cfa_offset - 96
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     ret
 .endm
 
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     sub sp, sp, #224
     .cfi_adjust_cfa_offset 224
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 224)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
 #endif
 
     // Stack alignment filler [sp, #8].
@@ -235,30 +235,31 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      *
      * TODO This is probably too conservative - saving FP & LR.
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    ldr xIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
 
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
 
-    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
@@ -266,7 +267,7 @@
 .endm
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     // FP args.
     ldp d0, d1, [sp, #16]
     ldp d2, d3, [sp, #32]
@@ -316,6 +317,208 @@
     .cfi_adjust_cfa_offset -224
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    sub sp, sp, #512
+    .cfi_adjust_cfa_offset 512
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
+#endif
+
+    // Save FP registers.
+    // For better performance, store d0 and d31 separately, so that all STPs are 16-byte aligned.
+    str d0,       [sp, #8]
+    stp d1, d2,   [sp, #16]
+    stp d3, d4,   [sp, #32]
+    stp d5, d6,   [sp, #48]
+    stp d7, d8,   [sp, #64]
+    stp d9, d10,  [sp, #80]
+    stp d11, d12, [sp, #96]
+    stp d13, d14, [sp, #112]
+    stp d15, d16, [sp, #128]
+    stp d17, d18, [sp, #144]
+    stp d19, d20, [sp, #160]
+    stp d21, d22, [sp, #176]
+    stp d23, d24, [sp, #192]
+    stp d25, d26, [sp, #208]
+    stp d27, d28, [sp, #224]
+    stp d29, d30, [sp, #240]
+    str d31,      [sp, #256]
+
+    // Save core registers.
+    str x0,       [sp, #264]
+    .cfi_rel_offset x0, 264
+
+    stp x1, x2,   [sp, #272]
+    .cfi_rel_offset x1, 272
+    .cfi_rel_offset x2, 280
+
+    stp x3, x4,   [sp, #288]
+    .cfi_rel_offset x3, 288
+    .cfi_rel_offset x4, 296
+
+    stp x5, x6,   [sp, #304]
+    .cfi_rel_offset x5, 304
+    .cfi_rel_offset x6, 312
+
+    stp x7, x8,   [sp, #320]
+    .cfi_rel_offset x7, 320
+    .cfi_rel_offset x8, 328
+
+    stp x9, x10,  [sp, #336]
+    .cfi_rel_offset x9, 336
+    .cfi_rel_offset x10, 344
+
+    stp x11, x12, [sp, #352]
+    .cfi_rel_offset x11, 352
+    .cfi_rel_offset x12, 360
+
+    stp x13, x14, [sp, #368]
+    .cfi_rel_offset x13, 368
+    .cfi_rel_offset x14, 376
+
+    stp x15, x16, [sp, #384]
+    .cfi_rel_offset x15, 384
+    .cfi_rel_offset x16, 392
+
+    stp x17, x18, [sp, #400]
+    .cfi_rel_offset x17, 400
+    .cfi_rel_offset x18, 408
+
+    stp x19, x20, [sp, #416]
+    .cfi_rel_offset x19, 416
+    .cfi_rel_offset x20, 424
+
+    stp x21, x22, [sp, #432]
+    .cfi_rel_offset x21, 432
+    .cfi_rel_offset x22, 440
+
+    stp x23, x24, [sp, #448]
+    .cfi_rel_offset x23, 448
+    .cfi_rel_offset x24, 456
+
+    stp x25, x26, [sp, #464]
+    .cfi_rel_offset x25, 464
+    .cfi_rel_offset x26, 472
+
+    stp x27, x28, [sp, #480]
+    .cfi_rel_offset x27, 480
+    .cfi_rel_offset x28, 488
+
+    stp x29, xLR, [sp, #496]
+    .cfi_rel_offset x29, 496
+    .cfi_rel_offset x30, 504
+
+    // art::Runtime** xIP0 = &art::Runtime::instance_
+    adrp xIP0, :got:_ZN3art7Runtime9instance_E
+    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
+
+    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
+
+    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
+    ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+
+    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
+    str xIP0, [sp]
+    // Place sp in Thread::Current()->top_quick_frame.
+    mov xIP0, sp
+    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    // Restore FP registers.
+    // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
+    ldr d0,       [sp, #8]
+    ldp d1, d2,   [sp, #16]
+    ldp d3, d4,   [sp, #32]
+    ldp d5, d6,   [sp, #48]
+    ldp d7, d8,   [sp, #64]
+    ldp d9, d10,  [sp, #80]
+    ldp d11, d12, [sp, #96]
+    ldp d13, d14, [sp, #112]
+    ldp d15, d16, [sp, #128]
+    ldp d17, d18, [sp, #144]
+    ldp d19, d20, [sp, #160]
+    ldp d21, d22, [sp, #176]
+    ldp d23, d24, [sp, #192]
+    ldp d25, d26, [sp, #208]
+    ldp d27, d28, [sp, #224]
+    ldp d29, d30, [sp, #240]
+    ldr d31,      [sp, #256]
+
+    // Restore core registers.
+    ldr x0,       [sp, #264]
+    .cfi_restore x0
+
+    ldp x1, x2,   [sp, #272]
+    .cfi_restore x1
+    .cfi_restore x2
+
+    ldp x3, x4,   [sp, #288]
+    .cfi_restore x3
+    .cfi_restore x4
+
+    ldp x5, x6,   [sp, #304]
+    .cfi_restore x5
+    .cfi_restore x6
+
+    ldp x7, x8,   [sp, #320]
+    .cfi_restore x7
+    .cfi_restore x8
+
+    ldp x9, x10,  [sp, #336]
+    .cfi_restore x9
+    .cfi_restore x10
+
+    ldp x11, x12, [sp, #352]
+    .cfi_restore x11
+    .cfi_restore x12
+
+    ldp x13, x14, [sp, #368]
+    .cfi_restore x13
+    .cfi_restore x14
+
+    ldp x15, x16, [sp, #384]
+    .cfi_restore x15
+    .cfi_restore x16
+
+    ldp x17, x18, [sp, #400]
+    .cfi_restore x17
+    .cfi_restore x18
+
+    ldp x19, x20, [sp, #416]
+    .cfi_restore x19
+    .cfi_restore x20
+
+    ldp x21, x22, [sp, #432]
+    .cfi_restore x21
+    .cfi_restore x22
+
+    ldp x23, x24, [sp, #448]
+    .cfi_restore x23
+    .cfi_restore x24
+
+    ldp x25, x26, [sp, #464]
+    .cfi_restore x25
+    .cfi_restore x26
+
+    ldp x27, x28, [sp, #480]
+    .cfi_restore x27
+    .cfi_restore x28
+
+    ldp x29, xLR, [sp, #496]
+    .cfi_restore x29
+    .cfi_restore x30
+
+    add sp, sp, #512
+    .cfi_adjust_cfa_offset -512
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz x0, 1f                // result non-zero branch over
     ret                        // return
@@ -333,7 +536,7 @@
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov x0, xSELF
 
     // Point of no return.
@@ -368,7 +571,7 @@
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x0, xSELF                     // pass Thread::Current
     b   \cxx_name                     // \cxx_name(Thread*)
 END \c_name
@@ -377,7 +580,7 @@
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context.
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
     mov x1, xSELF                     // pass Thread::Current.
     b   \cxx_name                     // \cxx_name(arg, Thread*).
     brk 0
@@ -387,7 +590,7 @@
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
     b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
     brk 0
@@ -406,6 +609,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -417,6 +625,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
@@ -447,7 +661,7 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
@@ -455,7 +669,7 @@
     mov    x3, sp
     bl     \cxx_name                      // (method_idx, this, Thread*, SP)
     mov    xIP0, x1                       // save Method*->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     cbz    x0, 1f                         // did we find the target? if not go to exception delivery
     br     xIP0                           // tail call to target
 1:
@@ -1079,7 +1293,7 @@
     ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
     ldxr   w1, [x4]
     mov    x3, x1
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     cbnz   w3, .Lnot_unlocked         // already thin locked
     // unlocked case - x1: original lock word that's zero except for the read barrier bits.
     orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
@@ -1095,9 +1309,9 @@
     cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
                                       // else contention, go to slow path
     mov    x3, x1                     // copy the lock word to check count overflow.
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits.
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
     add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
-    lsr    w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  // if either of the upper two bits (28-29) are set, we overflowed.
+    lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
     cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
     add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
     stxr   w3, w2, [x4]
@@ -1106,18 +1320,18 @@
 .Llock_stxr_fail:
     b      .Lretry_lock               // retry
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
     mov    x1, xSELF                  // pass Thread::Current
     bl     artLockObjectFromCode      // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object
 
 ENTRY art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
     mov    x1, xSELF                  // pass Thread::Current
     bl     artLockObjectFromCode      // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object_no_inline
 
@@ -1141,17 +1355,17 @@
     cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
     ldr    w2, [xSELF, #THREAD_ID_OFFSET]
     mov    x3, x1                     // copy lock word to check thread id equality
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
     uxth   w3, w3                     // zero top 16 bits
     cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
     mov    x3, x1                     // copy lock word to detect transition to unlocked
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  // zero the read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
     cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
     // transition to unlocked
     mov    x3, x1
-    and    w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK  // w3: zero except for the preserved read barrier bits
+    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
     dmb    ish                        // full (LoadStore|StoreStore) memory barrier
 #ifndef USE_READ_BARRIER
     str    w3, [x4]
@@ -1172,18 +1386,18 @@
 .Lunlock_stxr_fail:
     b      .Lretry_unlock               // retry
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object
 
 ENTRY art_quick_unlock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object_no_inline
 
@@ -1228,7 +1442,7 @@
     .cfi_restore x1
     .cfi_adjust_cfa_offset -32
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
     b artThrowClassCastException      // (Class*, Class*, Thread*)
     brk 0                             // We should not return here...
@@ -1242,6 +1456,22 @@
     .endif
 .endm
 
+// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
+// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
+.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
+    .ifc \xReg1, \xExclude
+        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
+    .else
+        .ifc \xReg2, \xExclude
+            ldr \xReg1, [sp, #\offset]          // restore xReg1
+        .else
+            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
+        .endif
+    .endif
+    .cfi_restore \xReg1
+    .cfi_restore \xReg2
+.endm
+
     /*
      * Macro to insert read barrier, only used in art_quick_aput_obj.
      * xDest, wDest and xObj are registers, offset is a defined literal such as
@@ -1249,8 +1479,18 @@
      * name mismatch between instructions. This macro uses the lower 32b of register when possible.
      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
      */
-.macro READ_BARRIER xDest, wDest, xObj, offset
+.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
 #ifdef USE_READ_BARRIER
+#ifdef USE_BAKER_READ_BARRIER
+    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
+    // False dependency to avoid needing load/load fence.
+    add \xObj, \xObj, \xTemp, lsr #32
+    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
+    UNPOISON_HEAP_REF \wDest
+    b .Lrb_exit\number
+#endif
+.Lrb_slowpath\number:
     // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
     stp x0, x1, [sp, #-48]!
     .cfi_adjust_cfa_offset 48
@@ -1284,6 +1524,7 @@
     .cfi_restore x30
     add sp, sp, #48
     .cfi_adjust_cfa_offset -48
+.Lrb_exit\number:
 #else
     ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
     UNPOISON_HEAP_REF \wDest
@@ -1322,12 +1563,12 @@
 #endif
 ENTRY art_quick_aput_obj
     cbz x2, .Ldo_aput_null
-    READ_BARRIER x3, w3, x0, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
-                                                         // This also zero-extends to x3
-    READ_BARRIER x4, w4, x2, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
-                                                         // This also zero-extends to x4
-    READ_BARRIER x3, w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET // Heap reference = 32b
-                                                         // This also zero-extends to x3
+    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
+                                                                    // This also zero-extends to x3
+    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
+    // This also zero-extends to x3
+    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
+                                                                    // This also zero-extends to x4
     cmp w3, w4  // value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
@@ -1389,7 +1630,7 @@
     .cfi_restore x1
     .cfi_adjust_cfa_offset -32
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov x1, x2                    // Pass value.
     mov x2, xSELF                 // Pass Thread::Current.
     b artThrowArrayStoreException // (Object*, Object*, Thread*).
@@ -1400,10 +1641,10 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1412,10 +1653,10 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1424,10 +1665,10 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1436,10 +1677,10 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
     mov    x4, xSELF                  // pass Thread::Current
     bl     \entrypoint                //
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
     DELIVER_PENDING_EXCEPTION
 END \name
@@ -1449,11 +1690,11 @@
 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1461,11 +1702,11 @@
 .macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1473,11 +1714,11 @@
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x3, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
     mov    x4, xSELF                  // pass Thread::Current
     bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     \return
 END \name
 .endm
@@ -1535,24 +1776,74 @@
 // This is separated out as the argument order is different.
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
                                       // x2 contains the parameter
     mov    x3, xSELF                  // pass Thread::Current
     bl     artSet64StaticFromCode
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_set64_static
 
     /*
-     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. w0 holds the string index. The fast
-     * path check for hit in strings cache has already been performed.
+     * Entry from managed code to resolve a string, this stub will
+     * check the dex cache for a matching string (the fast path), and if not found,
+     * it will allocate a String and deliver an exception on error.
+     * On success the String is returned. R0 holds the string index.
      */
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+ENTRY art_quick_resolve_string
+    ldr   x1, [sp]                                               // load referrer
+    ldr   w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET]           // load declaring class
+    ldr   x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]    // load string dex cache
+    and   x2, x0, #STRING_DEX_CACHE_SIZE_MINUS_ONE               // get masked string index into x2
+    ldr   x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x2
+    cmp   x0, x2, lsr #32                                         // compare against upper 32 bits
+    bne   .Lart_quick_resolve_string_slow_path
+    ubfx  x0, x2, #0, #32                                        // extract lower 32 bits into x0
+#ifdef USE_READ_BARRIER
+    // Most common case: GC is not marking.
+    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x3, .Lart_quick_resolve_string_marking
+#endif
+    ret
+
+// Slow path case, the index did not match.
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
+    mov   x1, xSELF                                 // pass Thread::Current
+    bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+// GC is marking case, need to check the mark bit.
+.Lart_quick_resolve_string_marking:
+    ldr   x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbnz  x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
+    // Save LR so that we can return, also x1 for alignment purposes.
+    stp    x1, xLR, [sp, #-16]!                     // Save x1, LR.
+    bl     artReadBarrierMark                       // Get the marked string back.
+    ldp    x1, xLR, [sp], #16                       // Restore registers.
+.Lart_quick_resolve_string_no_rb:
+    ret
+
+END art_quick_resolve_string
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+// Comment out allocators that have arm64 specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) implemented in asm
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
@@ -1652,13 +1943,80 @@
     mov    x0, x3                                             // Set the return value and return.
     ret
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME      // save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME             // save callee saves in case of GC
     mov    x2, xSELF                       // pass Thread::Current
     bl     artAllocObjectFromCodeRosAlloc  // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
+
+// The common fast path code for art_quick_alloc_array_region_tlab.
+.macro ALLOC_ARRAY_TLAB_FAST_PATH slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
+    // Check null class
+    cbz    \wClass, \slowPathLabel
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED \slowPathLabel, \xClass, \wClass, \xCount, \wCount, \xTemp0, \wTemp0, \xTemp1, \wTemp1, \xTemp2, \wTemp2
+.endm
+
+// The common fast path code for art_quick_alloc_array_region_tlab.
+.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
+    // Array classes are never finalizable or uninitialized, no need to check.
+    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
+    UNPOISON_HEAP_REF \wTemp0
+    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
+                                                              // bits.
+                                                              // xCount is holding a 32 bit value,
+                                                              // it can not overflow.
+    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
+    // Add array data offset and alignment.
+    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
+#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
+#error Long array data offset must be 4 greater than int array data offset.
+#endif
+
+    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
+                                                              // component size shift is 3
+                                                              // (for 64 bit alignment).
+    and    \xTemp0, \xTemp0, #4
+    add    \xTemp1, \xTemp1, \xTemp0
+    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignemnt mask
+                                                              // (addr + 7) & ~7. The mask must
+                                                              // be 64 bits to keep high bits in
+                                                              // case of overflow.
+    // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
+    // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
+    // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
+    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
+    bhs    \slowPathLabel                                     // path.
+
+    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
+                                                              // we use (end - begin) to handle
+                                                              // negative size arrays. It is
+                                                              // assumed that a negative size will
+                                                              // always be greater unsigned than
+                                                              // region size.
+    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
+    sub    \xTemp2, \xTemp2, \xTemp0
+    cmp    \xTemp1, \xTemp2
+    bhi    \slowPathLabel
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+                                                              // Move old thread_local_pos to x0
+                                                              // for the return value.
+    mov    x0, \xTemp0
+    add    \xTemp0, \xTemp0, \xTemp1
+    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
+    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
+    add    \xTemp0, \xTemp0, #1
+    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
+    POISON_HEAP_REF \wClass
+    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
+    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
+                                                              // Fence.
+    dmb    ishst
+    ret
+.endm
+
 // The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
 //
 // x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
@@ -1666,8 +2024,11 @@
 // Need to preserve x0 and x1 to the slow path.
 .macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
     cbz    x2, \slowPathLabel                                 // Check null class
-                                                              // Check class status.
-    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]
+    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
+.endm
+
+.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
+    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]              // Check class status.
     cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
     bne    \slowPathLabel
                                                               // Add a fake dependence from the
@@ -1680,6 +2041,10 @@
                                                               // a load-acquire for the status).
     eor    x3, x3, x3
     add    x2, x2, x3
+    ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel
+.endm
+
+.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
                                                               // Check access flags has
                                                               // kAccClassIsFinalizable.
     ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
@@ -1734,31 +2099,44 @@
     ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // Save callee saves in case of GC.
+    SETUP_SAVE_REFS_ONLY_FRAME           // Save callee saves in case of GC.
     mov    x2, xSELF                     // Pass Thread::Current.
     bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_tlab
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-ENTRY art_quick_alloc_object_region_tlab
+// The common code for art_quick_alloc_object_*region_tlab
+.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved
+ENTRY \name
     // Fast path region tlab allocation.
-    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index.
     // x2-x7: free.
 #if !defined(USE_READ_BARRIER)
     mvn    x0, xzr                                            // Read barrier must be enabled here.
     ret                                                       // Return -1.
 #endif
+.if \is_resolved
+    mov    x2, x0 // class is actually stored in x0 already
+.else
     ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
                                                               // Load the class (x2)
     ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
-                                                              // Read barrier for class load.
+.endif
+    // Most common case: GC is not marking.
     ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
-    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    cbnz   x3, .Lmarking\name
+.Ldo_allocation\name:
+    \fast_path .Lslow_path\name
+.Lmarking\name:
+    // GC is marking, check the lock word of the class for the mark bit.
+    // If the class is null, go slow path. The check is required to read the lock word.
+    cbz    w2, .Lslow_path\name
+    // Class is not null, check mark bit in lock word.
+    ldr    w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    // If the bit is not zero, do the allocation.
+    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
                                                               // The read barrier slow path. Mark
                                                               // the class.
     stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, lr).
@@ -1769,35 +2147,97 @@
     ldp    x0, x1, [sp, #0]                                   // Restore registers.
     ldr    xLR, [sp, #16]
     add    sp, sp, #32
-    b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
-.Lart_quick_alloc_object_region_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // Save callee saves in case of GC.
+    b      .Ldo_allocation\name
+.Lslow_path\name:
+    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
     mov    x2, xSELF                           // Pass Thread::Current.
-    bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    bl     \entrypoint                         // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_region_tlab
+END \name
+.endm
+
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1
+
+// The common code for art_quick_alloc_array_*region_tlab
+.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved
+ENTRY \name
+    // Fast path array allocation for region tlab allocation.
+    // x0: uint32_t type_idx
+    // x1: int32_t component_count
+    // x2: ArtMethod* method
+    // x3-x7: free.
+#if !defined(USE_READ_BARRIER)
+    mvn    x0, xzr                                            // Read barrier must be enabled here.
+    ret                                                       // Return -1.
+#endif
+.if \is_resolved
+    mov    x3, x0
+    // If already resolved, class is stored in x0
+.else
+    ldr    x3, [x2, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w3, [x3, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+.endif
+    // Most common case: GC is not marking.
+    ldr    w4, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x4, .Lmarking\name
+.Ldo_allocation\name:
+    \fast_path .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
+.Lmarking\name:
+    // GC is marking, check the lock word of the class for the mark bit.
+    // If the class is null, go slow path. The check is required to read the lock word.
+    cbz    w3, .Lslow_path\name
+    // Class is not null, check mark bit in lock word.
+    ldr    w4, [x3, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    // If the bit is not zero, do the allocation.
+    tbnz   w4, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, x2, lr).
+    stp    x2, xLR, [sp, #16]
+    mov    x0, x3                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    x3, x0                                             // Get the (marked) class back.
+    ldp    x2, xLR, [sp, #16]
+    ldp    x0, x1, [sp], #32                                  // Restore registers.
+    b      .Ldo_allocation\name
+.Lslow_path\name:
+    // x0: uint32_t type_idx / mirror::Class* klass (if resolved)
+    // x1: int32_t component_count
+    // x2: ArtMethod* method
+    // x3: Thread* self
+    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
+    mov    x3, xSELF                  // pass Thread::Current
+    bl     \entrypoint
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END \name
+.endm
+
+GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_region_tlab, artAllocArrayFromCodeRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH, 0
+// TODO: art_quick_alloc_array_resolved_region_tlab seems to not get called. Investigate compiler.
+GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, 1
 
     /*
      * Called by managed code when the thread has been asked to suspend.
      */
     .extern artTestSuspendFromCode
 ENTRY art_quick_test_suspend
-    ldrh   w0, [xSELF, #THREAD_FLAGS_OFFSET]  // get xSELF->state_and_flags.as_struct.flags
-    cbnz   w0, .Lneed_suspend                 // check flags == 0
-    ret                                       // return if flags == 0
-.Lneed_suspend:
+    SETUP_SAVE_EVERYTHING_FRAME               // save callee saves for stack crawl
     mov    x0, xSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
     bl     artTestSuspendFromCode             // (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    ret
 END art_quick_test_suspend
 
 ENTRY art_quick_implicit_suspend
     mov    x0, xSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
+    SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
     bl     artTestSuspendFromCode             // (Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
 END art_quick_implicit_suspend
 
      /*
@@ -1807,17 +2247,17 @@
      */
      .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
     ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
     fmov    d0, x0                      // Store result in d0 in case it was float or double
     ret                                 // return on success
 .Lexception_in_proxy:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
@@ -1857,17 +2297,17 @@
 END art_quick_imt_conflict_trampoline
 
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     mov x2, xSELF
     mov x3, sp
     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
     cbz x0, 1f
     mov xIP0, x0            // Remember returned code pointer in xIP0.
     ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     br xIP0
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
@@ -1927,7 +2367,7 @@
      * Called to do a generic JNI down-call
      */
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_X0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
 
     // Save SP , so we can have static CFI info.
     mov x28, sp
@@ -1999,7 +2439,7 @@
     .cfi_def_cfa_register sp
 
     // Tear down the callee-save frame.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     // store into fpr, for when it's a fpr return...
     fmov d0, x0
@@ -2021,7 +2461,7 @@
  * x1..x7, d0..d7 = arguments to that method.
  */
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
 
     //  x0 will contain mirror::ArtMethod* method.
     mov x1, xSELF                          // How to get Thread::Current() ???
@@ -2031,7 +2471,7 @@
     //                                      mirror::ArtMethod** sp)
     bl   artQuickToInterpreterBridge
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
 
     fmov d0, x0
 
@@ -2044,7 +2484,7 @@
 //
     .extern artInstrumentationMethodEntryFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
 
     mov   x20, x0             // Preserve method reference in a callee-save.
 
@@ -2055,7 +2495,7 @@
     mov   xIP0, x0            // x0 = result of call.
     mov   x0, x20             // Reload method reference.
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // Note: will restore xSELF
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
     adr   xLR, art_quick_instrumentation_exit
     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
 END art_quick_instrumentation_entry
@@ -2064,7 +2504,7 @@
 ENTRY art_quick_instrumentation_exit
     mov   xLR, #0             // Clobber LR for later checks.
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
 
     // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
     // we would need to fully restore it. As there are a lot of callee-save registers, it seems
@@ -2087,7 +2527,7 @@
     ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
     .cfi_adjust_cfa_offset 16
 
-    POP_REFS_ONLY_CALLEE_SAVE_FRAME
+    POP_SAVE_REFS_ONLY_FRAME
 
     br    xIP0                // Tail-call out.
 END art_quick_instrumentation_exit
@@ -2098,7 +2538,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov    x0, xSELF          // Pass thread.
     bl     artDeoptimize      // artDeoptimize(Thread*)
     brk 0
@@ -2110,7 +2550,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov    x0, xSELF                      // Pass thread.
     bl     artDeoptimizeFromCompiledCode  // artDeoptimizeFromCompiledCode(Thread*)
     brk 0
@@ -2209,107 +2649,158 @@
     ret
 END art_quick_indexof
 
-   /*
-     * String's compareTo.
-     *
-     * TODO: Not very optimized.
-     *
-     * On entry:
-     *    x0:   this object pointer
-     *    x1:   comp object pointer
-     *
-     */
-    .extern __memcmp16
-ENTRY art_quick_string_compareto
-    mov    x2, x0         // x0 is return, use x2 for first input.
-    sub    x0, x2, x1     // Same string object?
-    cbnz   x0,1f
-    ret
-1:                        // Different string objects.
-
-    ldr    w4, [x2, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    w3, [x1, #MIRROR_STRING_COUNT_OFFSET]
-    add    x2, x2, #MIRROR_STRING_VALUE_OFFSET
-    add    x1, x1, #MIRROR_STRING_VALUE_OFFSET
-
     /*
-     * Now:           Data*  Count
-     *    first arg    x2      w4
-     *   second arg    x1      w3
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through W register
+     * `wreg` (corresponding to X register `xreg`), saving and restoring
+     * all caller-save registers.
+     *
+     * If `wreg` is different from `w0`, the generated function follows a
+     * non-standard runtime calling convention:
+     * - register `wreg` is used to pass the (sole) argument of this
+     *   function (instead of W0);
+     * - register `wreg` is used to return the result of this function
+     *   (instead of W0);
+     * - W0 is treated like a normal (non-argument) caller-save register;
+     * - everything else is the same as in the standard runtime calling
+     *   convention (e.g. standard callee-save registers are preserved).
      */
-
-    // x0 := str1.length(w4) - str2.length(w3). ldr zero-extended w3/w4 into x3/x4.
-    subs x0, x4, x3
-    // Min(count1, count2) into w3.
-    csel x3, x3, x4, ge
-
-    // TODO: Tune this value.
-    // Check for long string, do memcmp16 for them.
-    cmp w3, #28  // Constant from arm32.
-    bgt .Ldo_memcmp16
-
+.macro READ_BARRIER_MARK_REG name, wreg, xreg
+ENTRY \name
+    // Reference is null, no work to do at all.
+    cbz \wreg, .Lret_rb_\name
     /*
-     * Now:
-     *   x2: *first string data
-     *   x1: *second string data
-     *   w3: iteration count
-     *   x0: return value if comparison equal
-     *   x4, x5, x6, x7: free
+     * Allocate 46 stack slots * 8 = 368 bytes:
+     * - 20 slots for core registers X0-X19
+     * - 24 slots for floating-point registers D0-D7 and D16-D31
+     * -  1 slot for return address register XLR
+     * -  1 padding slot for 16-byte stack alignment
      */
-
-    // Do a simple unrolled loop.
-.Lloop:
-    // At least two more elements?
-    subs w3, w3, #2
-    b.lt .Lremainder_or_done
-
-    ldrh w4, [x2], #2
-    ldrh w5, [x1], #2
-
-    ldrh w6, [x2], #2
-    ldrh w7, [x1], #2
-
-    subs w4, w4, w5
-    b.ne .Lw4_result
-
-    subs w6, w6, w7
-    b.ne .Lw6_result
-
-    b .Lloop
-
-.Lremainder_or_done:
-    adds w3, w3, #1
-    b.eq .Lremainder
+    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
+    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
     ret
+.Lslow_path_rb_\name:
+    // Save all potentially live caller-save core registers.
+    stp   x0, x1,   [sp, #-368]!
+    .cfi_adjust_cfa_offset 368
+    .cfi_rel_offset x0, 0
+    .cfi_rel_offset x1, 8
+    stp   x2, x3,   [sp, #16]
+    .cfi_rel_offset x2, 16
+    .cfi_rel_offset x3, 24
+    stp   x4, x5,   [sp, #32]
+    .cfi_rel_offset x4, 32
+    .cfi_rel_offset x5, 40
+    stp   x6, x7,   [sp, #48]
+    .cfi_rel_offset x6, 48
+    .cfi_rel_offset x7, 56
+    stp   x8, x9,   [sp, #64]
+    .cfi_rel_offset x8, 64
+    .cfi_rel_offset x9, 72
+    stp   x10, x11, [sp, #80]
+    .cfi_rel_offset x10, 80
+    .cfi_rel_offset x11, 88
+    stp   x12, x13, [sp, #96]
+    .cfi_rel_offset x12, 96
+    .cfi_rel_offset x13, 104
+    stp   x14, x15, [sp, #112]
+    .cfi_rel_offset x14, 112
+    .cfi_rel_offset x15, 120
+    stp   x16, x17, [sp, #128]
+    .cfi_rel_offset x16, 128
+    .cfi_rel_offset x17, 136
+    stp   x18, x19, [sp, #144]
+    .cfi_rel_offset x18, 144
+    .cfi_rel_offset x19, 152
+    // Save all potentially live caller-save floating-point registers.
+    stp   d0, d1,   [sp, #160]
+    stp   d2, d3,   [sp, #176]
+    stp   d4, d5,   [sp, #192]
+    stp   d6, d7,   [sp, #208]
+    stp   d16, d17, [sp, #224]
+    stp   d18, d19, [sp, #240]
+    stp   d20, d21, [sp, #256]
+    stp   d22, d23, [sp, #272]
+    stp   d24, d25, [sp, #288]
+    stp   d26, d27, [sp, #304]
+    stp   d28, d29, [sp, #320]
+    stp   d30, d31, [sp, #336]
+    // Save return address.
+    str   xLR,      [sp, #352]
+    .cfi_rel_offset x30, 352
+    // (sp + #360 is a padding slot)
 
-.Lremainder:
-    ldrh w4, [x2], #2
-    ldrh w5, [x1], #2
-    subs w4, w4, w5
-    b.ne .Lw4_result
+    .ifnc \wreg, w0
+      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
+    .endif
+    bl    artReadBarrierMark            // artReadBarrierMark(obj)
+    .ifnc \wreg, w0
+      mov   \wreg, w0                   // Return result into `wreg`
+    .endif
+
+    // Restore core regs, except `xreg`, as `wreg` is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REGS_NE x0, x1,   0,   \xreg
+    POP_REGS_NE x2, x3,   16,  \xreg
+    POP_REGS_NE x4, x5,   32,  \xreg
+    POP_REGS_NE x6, x7,   48,  \xreg
+    POP_REGS_NE x8, x9,   64,  \xreg
+    POP_REGS_NE x10, x11, 80,  \xreg
+    POP_REGS_NE x12, x13, 96,  \xreg
+    POP_REGS_NE x14, x15, 112, \xreg
+    POP_REGS_NE x16, x17, 128, \xreg
+    POP_REGS_NE x18, x19, 144, \xreg
+    // Restore floating-point registers.
+    ldp   d0, d1,   [sp, #160]
+    ldp   d2, d3,   [sp, #176]
+    ldp   d4, d5,   [sp, #192]
+    ldp   d6, d7,   [sp, #208]
+    ldp   d16, d17, [sp, #224]
+    ldp   d18, d19, [sp, #240]
+    ldp   d20, d21, [sp, #256]
+    ldp   d22, d23, [sp, #272]
+    ldp   d24, d25, [sp, #288]
+    ldp   d26, d27, [sp, #304]
+    ldp   d28, d29, [sp, #320]
+    ldp   d30, d31, [sp, #336]
+    // Restore return address and remove padding.
+    ldr   xLR,      [sp, #352]
+    .cfi_restore x30
+    add sp, sp, #368
+    .cfi_adjust_cfa_offset -368
+.Lret_rb_\name:
     ret
+END \name
+.endm
 
-// Result is in w4
-.Lw4_result:
-    sxtw x0, w4
-    ret
-
-// Result is in w6
-.Lw6_result:
-    sxtw x0, w6
-    ret
-
-.Ldo_memcmp16:
-    mov x14, x0                  // Save x0 and LR. __memcmp16 does not use these temps.
-    mov x15, xLR                 //                 TODO: Codify and check that?
-
-    mov x0, x2
-    uxtw x2, w3
-    bl __memcmp16
-
-    mov xLR, x15                 // Restore LR.
-
-    cmp x0, #0                   // Check the memcmp difference.
-    csel x0, x0, x14, ne         // x0 := x0 != 0 ? x14(prev x0=length diff) : x1.
-    ret
-END art_quick_string_compareto
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
+// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index b525309..36f283b 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -29,7 +29,7 @@
 static constexpr uint32_t kArm64CalleeSaveAlwaysSpills =
     // Note: ArtMethod::GetReturnPcOffsetInBytes() rely on the assumption that
     // LR is always saved on the top of the frame for all targets.
-    // That is, lr = *(sp + framesize - pointsize).
+    // That is, lr = *(sp + framesize - pointer_size).
     (1 << art::arm64::LR);
 // Callee saved registers
 static constexpr uint32_t kArm64CalleeSaveRefSpills =
@@ -44,6 +44,14 @@
     (1 << art::arm64::X7);
 static constexpr uint32_t kArm64CalleeSaveAllSpills =
     (1 << art::arm64::X19);
+static constexpr uint32_t kArm64CalleeSaveEverythingSpills =
+    (1 << art::arm64::X0) | (1 << art::arm64::X1) | (1 << art::arm64::X2) |
+    (1 << art::arm64::X3) | (1 << art::arm64::X4) | (1 << art::arm64::X5) |
+    (1 << art::arm64::X6) | (1 << art::arm64::X7) | (1 << art::arm64::X8) |
+    (1 << art::arm64::X9) | (1 << art::arm64::X10) | (1 << art::arm64::X11) |
+    (1 << art::arm64::X12) | (1 << art::arm64::X13) | (1 << art::arm64::X14) |
+    (1 << art::arm64::X15) | (1 << art::arm64::X16) | (1 << art::arm64::X17) |
+    (1 << art::arm64::X18) | (1 << art::arm64::X19);
 
 static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0;
@@ -55,23 +63,37 @@
     (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
     (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
     (1 << art::arm64::D14)  | (1 << art::arm64::D15);
+static constexpr uint32_t kArm64CalleeSaveFpEverythingSpills =
+    (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
+    (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
+    (1 << art::arm64::D6) | (1 << art::arm64::D7) | (1 << art::arm64::D8) |
+    (1 << art::arm64::D9) | (1 << art::arm64::D10) | (1 << art::arm64::D11) |
+    (1 << art::arm64::D12) | (1 << art::arm64::D13) | (1 << art::arm64::D14) |
+    (1 << art::arm64::D15) | (1 << art::arm64::D16) | (1 << art::arm64::D17) |
+    (1 << art::arm64::D18) | (1 << art::arm64::D19) | (1 << art::arm64::D20) |
+    (1 << art::arm64::D21) | (1 << art::arm64::D22) | (1 << art::arm64::D23) |
+    (1 << art::arm64::D24) | (1 << art::arm64::D25) | (1 << art::arm64::D26) |
+    (1 << art::arm64::D27) | (1 << art::arm64::D28) | (1 << art::arm64::D29) |
+    (1 << art::arm64::D30) | (1 << art::arm64::D31);
 
 constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArm64CalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArm64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kArm64CalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kArm64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(Arm64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(Arm64CalleeSaveFpSpills(type)) /* fprs */ +
-                  1 /* Method* */) * kArm64PointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kArm64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo Arm64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
@@ -83,17 +105,18 @@
 constexpr size_t Arm64CalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
          (POPCOUNT(Arm64CalleeSaveCoreSpills(type)) +
-          POPCOUNT(Arm64CalleeSaveFpSpills(type))) * kArm64PointerSize;
+          POPCOUNT(Arm64CalleeSaveFpSpills(type))) * static_cast<size_t>(kArm64PointerSize);
 }
 
 constexpr size_t Arm64CalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
-         POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * kArm64PointerSize;
+         POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * static_cast<size_t>(kArm64PointerSize);
 }
 
 constexpr size_t Arm64CalleeSaveLrOffset(Runtime::CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
-      POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) * kArm64PointerSize;
+      POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) *
+      static_cast<size_t>(kArm64PointerSize);
 }
 
 }  // namespace arm64
diff --git a/runtime/arch/arm64/thread_arm64.cc b/runtime/arch/arm64/thread_arm64.cc
index 564dced..3483b70 100644
--- a/runtime/arch/arm64/thread_arm64.cc
+++ b/runtime/arch/arm64/thread_arm64.cc
@@ -17,15 +17,16 @@
 #include "thread.h"
 
 #include "asm_support_arm64.h"
+#include "base/enums.h"
 #include "base/logging.h"
 
 namespace art {
 
 void Thread::InitCpu() {
-  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<8>().Int32Value());
+  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<PointerSize::k64>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/instruction_set.cc b/runtime/arch/instruction_set.cc
index 81ca010..b35e088 100644
--- a/runtime/arch/instruction_set.cc
+++ b/runtime/arch/instruction_set.cc
@@ -18,6 +18,7 @@
 
 // Explicitly include our own elf.h to avoid Linux and other dependencies.
 #include "../elf.h"
+#include "base/bit_utils.h"
 #include "globals.h"
 
 namespace art {
@@ -113,14 +114,44 @@
   }
 }
 
-static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
-static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
-static constexpr size_t kMips64StackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+#if !defined(ART_STACK_OVERFLOW_GAP_arm) || !defined(ART_STACK_OVERFLOW_GAP_arm64) || \
+    !defined(ART_STACK_OVERFLOW_GAP_mips) || !defined(ART_STACK_OVERFLOW_GAP_mips64) || \
+    !defined(ART_STACK_OVERFLOW_GAP_x86) || !defined(ART_STACK_OVERFLOW_GAP_x86_64)
+#error "Missing defines for stack overflow gap"
+#endif
 
-static constexpr size_t kArmStackOverflowReservedBytes =    8 * KB;
-static constexpr size_t kArm64StackOverflowReservedBytes =  8 * KB;
-static constexpr size_t kX86StackOverflowReservedBytes =    8 * KB;
-static constexpr size_t kX86_64StackOverflowReservedBytes = 8 * KB;
+static constexpr size_t kArmStackOverflowReservedBytes    = ART_STACK_OVERFLOW_GAP_arm;
+static constexpr size_t kArm64StackOverflowReservedBytes  = ART_STACK_OVERFLOW_GAP_arm64;
+static constexpr size_t kMipsStackOverflowReservedBytes   = ART_STACK_OVERFLOW_GAP_mips;
+static constexpr size_t kMips64StackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_mips64;
+static constexpr size_t kX86StackOverflowReservedBytes    = ART_STACK_OVERFLOW_GAP_x86;
+static constexpr size_t kX86_64StackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_x86_64;
+
+static_assert(IsAligned<kPageSize>(kArmStackOverflowReservedBytes), "ARM gap not page aligned");
+static_assert(IsAligned<kPageSize>(kArm64StackOverflowReservedBytes), "ARM64 gap not page aligned");
+static_assert(IsAligned<kPageSize>(kMipsStackOverflowReservedBytes), "Mips gap not page aligned");
+static_assert(IsAligned<kPageSize>(kMips64StackOverflowReservedBytes),
+              "Mips64 gap not page aligned");
+static_assert(IsAligned<kPageSize>(kX86StackOverflowReservedBytes), "X86 gap not page aligned");
+static_assert(IsAligned<kPageSize>(kX86_64StackOverflowReservedBytes),
+              "X86_64 gap not page aligned");
+
+#if !defined(ART_FRAME_SIZE_LIMIT)
+#error "ART frame size limit missing"
+#endif
+
+// TODO: Should we require an extra page (RoundUp(SIZE) + kPageSize)?
+static_assert(ART_FRAME_SIZE_LIMIT < kArmStackOverflowReservedBytes, "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kArm64StackOverflowReservedBytes,
+              "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kMipsStackOverflowReservedBytes,
+              "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kMips64StackOverflowReservedBytes,
+              "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kX86StackOverflowReservedBytes,
+              "Frame size limit too large");
+static_assert(ART_FRAME_SIZE_LIMIT < kX86_64StackOverflowReservedBytes,
+              "Frame size limit too large");
 
 size_t GetStackOverflowReservedBytes(InstructionSet isa) {
   switch (isa) {
diff --git a/runtime/arch/instruction_set.h b/runtime/arch/instruction_set.h
index ff9c0b3..917acc9 100644
--- a/runtime/arch/instruction_set.h
+++ b/runtime/arch/instruction_set.h
@@ -20,6 +20,7 @@
 #include <iosfwd>
 #include <string>
 
+#include "base/enums.h"
 #include "base/logging.h"  // Logging is required for FATAL in the helper functions.
 
 namespace art {
@@ -53,12 +54,12 @@
 #endif
 
 // Architecture-specific pointer sizes
-static constexpr size_t kArmPointerSize = 4;
-static constexpr size_t kArm64PointerSize = 8;
-static constexpr size_t kMipsPointerSize = 4;
-static constexpr size_t kMips64PointerSize = 8;
-static constexpr size_t kX86PointerSize = 4;
-static constexpr size_t kX86_64PointerSize = 8;
+static constexpr PointerSize kArmPointerSize = PointerSize::k32;
+static constexpr PointerSize kArm64PointerSize = PointerSize::k64;
+static constexpr PointerSize kMipsPointerSize = PointerSize::k32;
+static constexpr PointerSize kMips64PointerSize = PointerSize::k64;
+static constexpr PointerSize kX86PointerSize = PointerSize::k32;
+static constexpr PointerSize kX86_64PointerSize = PointerSize::k64;
 
 // ARM instruction alignment. ARM processors require code to be 4-byte aligned,
 // but ARM ELF requires 8..
@@ -82,7 +83,7 @@
 
 InstructionSet GetInstructionSetFromELF(uint16_t e_machine, uint32_t e_flags);
 
-static inline size_t GetInstructionSetPointerSize(InstructionSet isa) {
+static inline PointerSize GetInstructionSetPointerSize(InstructionSet isa) {
   switch (isa) {
     case kArm:
       // Fall-through.
@@ -147,8 +148,8 @@
   }
 }
 
-static inline size_t InstructionSetPointerSize(InstructionSet isa) {
-  return Is64BitInstructionSet(isa) ? 8U : 4U;
+static inline PointerSize InstructionSetPointerSize(InstructionSet isa) {
+  return Is64BitInstructionSet(isa) ? PointerSize::k64 : PointerSize::k32;
 }
 
 static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
diff --git a/runtime/arch/instruction_set_features_test.cc b/runtime/arch/instruction_set_features_test.cc
index 99c2d4d..fb38b47 100644
--- a/runtime/arch/instruction_set_features_test.cc
+++ b/runtime/arch/instruction_set_features_test.cc
@@ -18,7 +18,7 @@
 
 #include <gtest/gtest.h>
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/properties.h"
 #endif
 
@@ -26,7 +26,7 @@
 
 namespace art {
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #if defined(__aarch64__)
 TEST(InstructionSetFeaturesTest, DISABLED_FeaturesFromSystemPropertyVariant) {
   LOG(WARNING) << "Test disabled due to no CPP define for A53 erratum 835769";
@@ -111,7 +111,7 @@
 }
 #endif
 
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 TEST(InstructionSetFeaturesTest, HostFeaturesFromCppDefines) {
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> default_features(
diff --git a/runtime/arch/instruction_set_test.cc b/runtime/arch/instruction_set_test.cc
index 2f3cf18..5dfc4b4 100644
--- a/runtime/arch/instruction_set_test.cc
+++ b/runtime/arch/instruction_set_test.cc
@@ -18,6 +18,7 @@
 
 #include <gtest/gtest.h>
 
+#include "base/enums.h"
 #include "base/stringprintf.h"
 
 namespace art {
@@ -49,7 +50,7 @@
 }
 
 TEST(InstructionSetTest, PointerSize) {
-  EXPECT_EQ(sizeof(void*), GetInstructionSetPointerSize(kRuntimeISA));
+  EXPECT_EQ(kRuntimePointerSize, GetInstructionSetPointerSize(kRuntimeISA));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 453056d..135b074 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 96
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 48
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 80
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 96
+#define FRAME_SIZE_SAVE_REFS_ONLY 48
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 80
+#define FRAME_SIZE_SAVE_EVERYTHING 256
 
 #endif  // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index 0d01ad5..937cd1e 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -45,7 +45,7 @@
       entrypoint == kQuickCmpgFloat ||
       entrypoint == kQuickCmplDouble ||
       entrypoint == kQuickCmplFloat ||
-      entrypoint == kQuickReadBarrierMark ||
+      entrypoint == kQuickReadBarrierJni ||
       entrypoint == kQuickReadBarrierSlow ||
       entrypoint == kQuickReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 45e33a8..09f8849 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -28,8 +28,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
@@ -268,6 +268,8 @@
   static_assert(!IsDirectEntrypoint(kQuickThrowNullPointer), "Non-direct C stub marked direct.");
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
   static_assert(!IsDirectEntrypoint(kQuickThrowStackOverflow), "Non-direct C stub marked direct.");
+  qpoints->pThrowStringBounds = art_quick_throw_string_bounds;
+  static_assert(!IsDirectEntrypoint(kQuickThrowStringBounds), "Non-direct C stub marked direct.");
 
   // Deoptimization from compiled code.
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
@@ -281,9 +283,99 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMark = artReadBarrierMark;
-  static_assert(IsDirectEntrypoint(kQuickReadBarrierMark), "Direct C stub not marked direct.");
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierJni), "Direct C stub not marked direct.");
+  // Read barriers (and these entry points in particular) are not
+  // supported in the compiler on MIPS32.
+  qpoints->pReadBarrierMarkReg00 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg00),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg01 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg02 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg03 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg04 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg05 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg06 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg07 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg15),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg16),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg23),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg24),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg25),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg26),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg27),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg28),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg29 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
+                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 8ea78eb..7969a8f 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -27,7 +27,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 
 //
 // Mips specific fault handler functions.
@@ -44,7 +44,7 @@
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]);   // SP register
+  *out_sp = static_cast<uintptr_t>(sc->sc_regs[mips::SP]);
   VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
@@ -56,7 +56,7 @@
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
       reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips));
   if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]);  // A0 register
+    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[mips::A0]);
   } else {
     // The method is at the top of the stack.
     *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
@@ -71,8 +71,10 @@
   *out_return_pc = sc->sc_pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -80,9 +82,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
-  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->sc_regs[mips::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -111,7 +116,7 @@
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uintptr_t sp = sc->sc_regs[29];  // SP register
+  uintptr_t sp = sc->sc_regs[mips::SP];
   VLOG(signals) << "sp: " << std::hex << sp;
 
   uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);  // BVA addr
@@ -134,7 +139,7 @@
   // caused this fault.  This will be inserted into a callee save frame by
   // the function to which this handler returns (art_quick_throw_stack_overflow).
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
 
   // The kernel will now return to the address in sc->arm_pc.
   return true;
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 93d79b7..b3a9866 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -76,21 +76,22 @@
   GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   // Override defaults based on variant string.
-  // Only care if it is R1, R2 or R6 and we assume all CPUs will have a FP unit.
+  // Only care if it is R1, R2, R5 or R6 and we assume all CPUs will have a FP unit.
   constexpr const char* kMips32Prefix = "mips32r";
   const size_t kPrefixLength = strlen(kMips32Prefix);
   if (variant.compare(0, kPrefixLength, kMips32Prefix, kPrefixLength) == 0 &&
       variant.size() > kPrefixLength) {
-    if (variant[kPrefixLength] >= '6') {
-      fpu_32bit = false;
-      r6 = true;
-    }
-    if (variant[kPrefixLength] >= '2') {
-      mips_isa_gte2 = true;
-    }
+    r6 = (variant[kPrefixLength] >= '6');
+    fpu_32bit = (variant[kPrefixLength] < '5');
+    mips_isa_gte2 = (variant[kPrefixLength] >= '2');
   } else if (variant == "default") {
-    // Default variant is: smp = true, has fpu, is gte2, is not r6. This is the traditional
-    // setting.
+    // Default variant is: smp = true, has FPU, is gte2. This is the traditional setting.
+    //
+    // Note, we get FPU bitness and R6-ness from the build (using cpp defines, see above)
+    // and don't override them because many things depend on the "default" variant being
+    // sufficient for most purposes. That is, "default" should work for both R2 and R6.
+    // Use "mips32r#" to get a specific configuration, possibly not matching the runtime
+    // ISA (e.g. for ISA-specific testing of dex2oat internals).
     mips_isa_gte2 = true;
   } else {
     LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant;
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
index aac436e..120dc1c 100644
--- a/runtime/arch/mips/instruction_set_features_mips.h
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -81,8 +81,19 @@
 
  private:
   MipsInstructionSetFeatures(bool smp, bool fpu_32bit, bool mips_isa_gte2, bool r6)
-      : InstructionSetFeatures(smp), fpu_32bit_(fpu_32bit),  mips_isa_gte2_(mips_isa_gte2), r6_(r6)
-  {}
+      : InstructionSetFeatures(smp),
+        fpu_32bit_(fpu_32bit),
+        mips_isa_gte2_(mips_isa_gte2),
+        r6_(r6) {
+    // Sanity checks.
+    if (r6) {
+      CHECK(mips_isa_gte2);
+      CHECK(!fpu_32bit);
+    }
+    if (!mips_isa_gte2) {
+      CHECK(fpu_32bit);
+    }
+  }
 
   // Bitmap positions for encoding features as a bitmap.
   enum {
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 3ee26af..3d393f6 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -30,19 +30,19 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      * Callee-save: $s0-$s8 + $gp + $ra, 11 total + 1 word for Method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVES + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     addiu  $sp, $sp, -96
     .cfi_adjust_cfa_offset 96
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 96)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 96)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS) size not as expected."
 #endif
 
     sw     $ra, 92($sp)
@@ -79,7 +79,7 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
@@ -88,20 +88,20 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). Restoration assumes non-moving GC.
      * Does not include rSUSPEND or rSELF
      * callee-save: $s2-$s8 + $gp + $ra, 9 total + 2 words padding + 1 word to hold Method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_ONLY_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_ONLY + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
     addiu  $sp, $sp, -48
     .cfi_adjust_cfa_offset 48
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 48)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 48)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(MIPS) size not as expected."
 #endif
 
     sw     $ra, 44($sp)
@@ -126,14 +126,14 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
     lw     $ra, 44($sp)
@@ -158,24 +158,24 @@
     .cfi_adjust_cfa_offset -48
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
     jalr   $zero, $ra
     nop
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
      * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     addiu  $sp, $sp, -80
     .cfi_adjust_cfa_offset 80
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 80)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(MIPS) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 80)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS) size not as expected."
 #endif
 
     sw     $ra, 76($sp)
@@ -209,17 +209,17 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
      * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      * Clobbers $t0 and $sp
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    lw $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
@@ -228,22 +228,22 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes non-moving GC.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
      * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
      * Clobbers $sp
      * Use $a0 as the Method* and loads it into bottom of stack.
      * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
-     * Reserves FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack
+     * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
     sw $a0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
     addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
     lw     $ra, 76($sp)
@@ -277,11 +277,208 @@
 .endm
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
+     *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
+     * Clobbers $t0 and $t1.
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, -256
+    .cfi_adjust_cfa_offset 256
+
+     // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 256)
+#error "FRAME_SIZE_SAVE_EVERYTHING(MIPS) size not as expected."
+#endif
+
+    sw     $ra, 252($sp)
+    .cfi_rel_offset 31, 252
+    sw     $fp, 248($sp)
+    .cfi_rel_offset 30, 248
+    sw     $gp, 244($sp)
+    .cfi_rel_offset 28, 244
+    sw     $t9, 240($sp)
+    .cfi_rel_offset 25, 240
+    sw     $t8, 236($sp)
+    .cfi_rel_offset 24, 236
+    sw     $s7, 232($sp)
+    .cfi_rel_offset 23, 232
+    sw     $s6, 228($sp)
+    .cfi_rel_offset 22, 228
+    sw     $s5, 224($sp)
+    .cfi_rel_offset 21, 224
+    sw     $s4, 220($sp)
+    .cfi_rel_offset 20, 220
+    sw     $s3, 216($sp)
+    .cfi_rel_offset 19, 216
+    sw     $s2, 212($sp)
+    .cfi_rel_offset 18, 212
+    sw     $s1, 208($sp)
+    .cfi_rel_offset 17, 208
+    sw     $s0, 204($sp)
+    .cfi_rel_offset 16, 204
+    sw     $t7, 200($sp)
+    .cfi_rel_offset 15, 200
+    sw     $t6, 196($sp)
+    .cfi_rel_offset 14, 196
+    sw     $t5, 192($sp)
+    .cfi_rel_offset 13, 192
+    sw     $t4, 188($sp)
+    .cfi_rel_offset 12, 188
+    sw     $t3, 184($sp)
+    .cfi_rel_offset 11, 184
+    sw     $t2, 180($sp)
+    .cfi_rel_offset 10, 180
+    sw     $t1, 176($sp)
+    .cfi_rel_offset 9, 176
+    sw     $t0, 172($sp)
+    .cfi_rel_offset 8, 172
+    sw     $a3, 168($sp)
+    .cfi_rel_offset 7, 168
+    sw     $a2, 164($sp)
+    .cfi_rel_offset 6, 164
+    sw     $a1, 160($sp)
+    .cfi_rel_offset 5, 160
+    sw     $a0, 156($sp)
+    .cfi_rel_offset 4, 156
+    sw     $v1, 152($sp)
+    .cfi_rel_offset 3, 152
+    sw     $v0, 148($sp)
+    .cfi_rel_offset 2, 148
+
+    // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+    bal 1f
+    .set push
+    .set noat
+    sw     $at, 144($sp)
+    .cfi_rel_offset 1, 144
+    .set pop
+1:
+    .cpload $ra
+
+    SDu $f30, $f31, 136, $sp, $t1
+    SDu $f28, $f29, 128, $sp, $t1
+    SDu $f26, $f27, 120, $sp, $t1
+    SDu $f24, $f25, 112, $sp, $t1
+    SDu $f22, $f23, 104, $sp, $t1
+    SDu $f20, $f21, 96,  $sp, $t1
+    SDu $f18, $f19, 88,  $sp, $t1
+    SDu $f16, $f17, 80,  $sp, $t1
+    SDu $f14, $f15, 72,  $sp, $t1
+    SDu $f12, $f13, 64,  $sp, $t1
+    SDu $f10, $f11, 56,  $sp, $t1
+    SDu $f8, $f9, 48,  $sp, $t1
+    SDu $f6, $f7, 40,  $sp, $t1
+    SDu $f4, $f5, 32,  $sp, $t1
+    SDu $f2, $f3, 24,  $sp, $t1
+    SDu $f0, $f1, 16,  $sp, $t1
+
+    # 3 words padding and 1 word for holding Method*
+
+    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
+    lw $t0, 0($t0)
+    lw $t0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET($t0)
+    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
+    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
+    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
+    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
+
+    LDu $f30, $f31, 136, $sp, $t1
+    LDu $f28, $f29, 128, $sp, $t1
+    LDu $f26, $f27, 120, $sp, $t1
+    LDu $f24, $f25, 112, $sp, $t1
+    LDu $f22, $f23, 104, $sp, $t1
+    LDu $f20, $f21, 96,  $sp, $t1
+    LDu $f18, $f19, 88,  $sp, $t1
+    LDu $f16, $f17, 80,  $sp, $t1
+    LDu $f14, $f15, 72,  $sp, $t1
+    LDu $f12, $f13, 64,  $sp, $t1
+    LDu $f10, $f11, 56,  $sp, $t1
+    LDu $f8, $f9, 48,  $sp, $t1
+    LDu $f6, $f7, 40,  $sp, $t1
+    LDu $f4, $f5, 32,  $sp, $t1
+    LDu $f2, $f3, 24,  $sp, $t1
+    LDu $f0, $f1, 16,  $sp, $t1
+
+    lw     $ra, 252($sp)
+    .cfi_restore 31
+    lw     $fp, 248($sp)
+    .cfi_restore 30
+    lw     $gp, 244($sp)
+    .cfi_restore 28
+    lw     $t9, 240($sp)
+    .cfi_restore 25
+    lw     $t8, 236($sp)
+    .cfi_restore 24
+    lw     $s7, 232($sp)
+    .cfi_restore 23
+    lw     $s6, 228($sp)
+    .cfi_restore 22
+    lw     $s5, 224($sp)
+    .cfi_restore 21
+    lw     $s4, 220($sp)
+    .cfi_restore 20
+    lw     $s3, 216($sp)
+    .cfi_restore 19
+    lw     $s2, 212($sp)
+    .cfi_restore 18
+    lw     $s1, 208($sp)
+    .cfi_restore 17
+    lw     $s0, 204($sp)
+    .cfi_restore 16
+    lw     $t7, 200($sp)
+    .cfi_restore 15
+    lw     $t6, 196($sp)
+    .cfi_restore 14
+    lw     $t5, 192($sp)
+    .cfi_restore 13
+    lw     $t4, 188($sp)
+    .cfi_restore 12
+    lw     $t3, 184($sp)
+    .cfi_restore 11
+    lw     $t2, 180($sp)
+    .cfi_restore 10
+    lw     $t1, 176($sp)
+    .cfi_restore 9
+    lw     $t0, 172($sp)
+    .cfi_restore 8
+    lw     $a3, 168($sp)
+    .cfi_restore 7
+    lw     $a2, 164($sp)
+    .cfi_restore 6
+    lw     $a1, 160($sp)
+    .cfi_restore 5
+    lw     $a0, 156($sp)
+    .cfi_restore 4
+    lw     $v1, 152($sp)
+    .cfi_restore 3
+    lw     $v0, 148($sp)
+    .cfi_restore 2
+    .set push
+    .set noat
+    lw     $at, 144($sp)
+    .cfi_restore 1
+    .set pop
+
+    addiu  $sp, $sp, 256            # pop frame
+    .cfi_adjust_cfa_offset -256
+.endm
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
     la      $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
     move    $a0, rSELF                   # pass Thread::Current
@@ -289,7 +486,7 @@
 
 .macro RETURN_IF_NO_EXCEPTION
     lw     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bnez   $t0, 1f                       # success if no exception is pending
     nop
     jalr   $zero, $ra
@@ -299,7 +496,7 @@
 .endm
 
 .macro RETURN_IF_ZERO
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bnez   $v0, 1f                       # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -309,7 +506,7 @@
 .endm
 
 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     beqz   $v0, 1f                       # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -489,7 +686,7 @@
      * the bottom of the thread. On entry a0 holds Throwable*
      */
 ENTRY art_quick_deliver_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artDeliverExceptionFromCode
     jalr $zero, $t9                 # artDeliverExceptionFromCode(Throwable*, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -500,18 +697,30 @@
      */
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowNullPointerExceptionFromCode
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_null_pointer_exception
 
+
+    /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+    .extern artThrowNullPointerExceptionFromSignal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    la   $t9, artThrowNullPointerExceptionFromSignal
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
+END art_quick_throw_null_pointer_exception_from_signal
+
     /*
      * Called by managed code to create and deliver an ArithmeticException
      */
     .extern artThrowDivZeroFromCode
 ENTRY art_quick_throw_div_zero
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowDivZeroFromCode
     jalr $zero, $t9                 # artThrowDivZeroFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -522,18 +731,30 @@
      */
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowArrayBoundsFromCode
     jalr $zero, $t9                 # artThrowArrayBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_throw_array_bounds
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt().
+     */
+    .extern artThrowStringBoundsFromCode
+ENTRY art_quick_throw_string_bounds
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    la   $t9, artThrowStringBoundsFromCode
+    jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
+END art_quick_throw_string_bounds
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
     .extern artThrowStackOverflowFromCode
 ENTRY art_quick_throw_stack_overflow
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowStackOverflowFromCode
     jalr $zero, $t9                 # artThrowStackOverflowFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -544,7 +765,7 @@
      */
     .extern artThrowNoSuchMethodFromCode
 ENTRY art_quick_throw_no_such_method
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowNoSuchMethodFromCode
     jalr $zero, $t9                 # artThrowNoSuchMethodFromCode(method_idx, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -567,13 +788,13 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         # save callee saves in case allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     la    $t9, \cxx_name
     jalr  $t9                              # (method_idx, this, Thread*, $sp)
     addiu $a3, $sp, ARG_SLOT_SIZE          # pass $sp (remove arg slots)
     move  $a0, $v0                         # save target Method*
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     beqz  $v0, 1f
     move  $t9, $v1                         # save $v0->code_
     jalr  $zero, $t9
@@ -884,11 +1105,11 @@
      */
     .extern artHandleFillArrayDataFromCode
 ENTRY art_quick_handle_fill_data
-    lw     $a2, 0($sp)                    # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case exception allocation triggers GC
+    lw     $a2, 0($sp)                # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
     la     $t9, artHandleFillArrayDataFromCode
-    jalr   $t9                            # (payload offset, Array*, method, Thread*)
-    move   $a3, rSELF                     # pass Thread::Current
+    jalr   $t9                        # (payload offset, Array*, method, Thread*)
+    move   $a3, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_handle_fill_data
 
@@ -899,7 +1120,7 @@
 ENTRY art_quick_lock_object
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     la      $t9, artLockObjectFromCode
     jalr    $t9                           # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
@@ -909,7 +1130,7 @@
 ENTRY art_quick_lock_object_no_inline
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     la      $t9, artLockObjectFromCode
     jalr    $t9                           # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
@@ -923,7 +1144,7 @@
 ENTRY art_quick_unlock_object
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
     la      $t9, artUnlockObjectFromCode
     jalr    $t9                       # (Object* obj, Thread*)
     move    $a1, rSELF                # pass Thread::Current
@@ -933,7 +1154,7 @@
 ENTRY art_quick_unlock_object_no_inline
     beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
     la      $t9, artUnlockObjectFromCode
     jalr    $t9                       # (Object* obj, Thread*)
     move    $a1, rSELF                # pass Thread::Current
@@ -969,7 +1190,7 @@
     lw     $a0, 0($sp)
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -1122,7 +1343,7 @@
     .cfi_adjust_cfa_offset -32
     bnez   $v0, .Ldo_aput
     nop
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     move $a1, $a2
     la   $t9, artThrowArrayStoreException
     jalr $zero, $t9                 # artThrowArrayStoreException(Class*, Class*, Thread*)
@@ -1135,7 +1356,7 @@
     .extern artGetBooleanStaticFromCode
 ENTRY art_quick_get_boolean_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetBooleanStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1147,7 +1368,7 @@
     .extern artGetByteStaticFromCode
 ENTRY art_quick_get_byte_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetByteStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1160,7 +1381,7 @@
     .extern artGetCharStaticFromCode
 ENTRY art_quick_get_char_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetCharStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1172,7 +1393,7 @@
     .extern artGetShortStaticFromCode
 ENTRY art_quick_get_short_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetShortStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1185,7 +1406,7 @@
     .extern artGet32StaticFromCode
 ENTRY art_quick_get32_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGet32StaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1198,7 +1419,7 @@
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGet64StaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1211,7 +1432,7 @@
     .extern artGetObjStaticFromCode
 ENTRY art_quick_get_obj_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetObjStaticFromCode
     jalr   $t9                           # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
@@ -1224,7 +1445,7 @@
     .extern artGetBooleanInstanceFromCode
 ENTRY art_quick_get_boolean_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetBooleanInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1236,7 +1457,7 @@
     .extern artGetByteInstanceFromCode
 ENTRY art_quick_get_byte_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetByteInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1249,7 +1470,7 @@
     .extern artGetCharInstanceFromCode
 ENTRY art_quick_get_char_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetCharInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1261,7 +1482,7 @@
     .extern artGetShortInstanceFromCode
 ENTRY art_quick_get_short_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetShortInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1274,7 +1495,7 @@
     .extern artGet32InstanceFromCode
 ENTRY art_quick_get32_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGet32InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1287,7 +1508,7 @@
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGet64InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1300,7 +1521,7 @@
     .extern artGetObjInstanceFromCode
 ENTRY art_quick_get_obj_instance
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artGetObjInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1313,7 +1534,7 @@
     .extern artSet8StaticFromCode
 ENTRY art_quick_set8_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet8StaticFromCode
     jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1326,7 +1547,7 @@
     .extern artSet16StaticFromCode
 ENTRY art_quick_set16_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet16StaticFromCode
     jalr   $t9                           # (field_idx, new_val, referrer, Thread*, $sp)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1339,7 +1560,7 @@
     .extern artSet32StaticFromCode
 ENTRY art_quick_set32_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet32StaticFromCode
     jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1353,7 +1574,7 @@
 ENTRY art_quick_set64_static
     lw     $a1, 0($sp)                   # pass referrer's Method*
                                          # 64 bit new_val is in a2:a3 pair
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet64StaticFromCode
     jalr   $t9                           # (field_idx, referrer, new_val, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1366,7 +1587,7 @@
     .extern artSetObjStaticFromCode
 ENTRY art_quick_set_obj_static
     lw     $a2, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSetObjStaticFromCode
     jalr   $t9                           # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
@@ -1379,7 +1600,7 @@
     .extern artSet8InstanceFromCode
 ENTRY art_quick_set8_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet8InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1392,7 +1613,7 @@
     .extern artSet16InstanceFromCode
 ENTRY art_quick_set16_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet16InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1405,7 +1626,7 @@
     .extern artSet32InstanceFromCode
 ENTRY art_quick_set32_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSet32InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1419,7 +1640,7 @@
 ENTRY art_quick_set64_instance
     lw     $t1, 0($sp)                   # load referrer's Method*
                                          # 64 bit new_val is in a2:a3 pair
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     sw     rSELF, 20($sp)                # pass Thread::Current
     la     $t9, artSet64InstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
@@ -1433,7 +1654,7 @@
     .extern artSetObjInstanceFromCode
 ENTRY art_quick_set_obj_instance
     lw     $a3, 0($sp)                   # pass referrer's Method*
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
     la     $t9, artSetObjInstanceFromCode
     jalr   $t9                           # (field_idx, Object*, new_val, referrer, Thread*)
     sw     rSELF, 16($sp)                # pass Thread::Current
@@ -1444,7 +1665,7 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
     la      $t9, \entrypoint
     jalr    $t9
     move    $a1, rSELF                # pass Thread::Current
@@ -1455,7 +1676,7 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
     la      $t9, \entrypoint
     jalr    $t9
     move    $a2, rSELF                # pass Thread::Current
@@ -1466,7 +1687,7 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
     la      $t9, \entrypoint
     jalr    $t9
     move    $a3, rSELF                # pass Thread::Current
@@ -1477,7 +1698,7 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
     la      $t9, \entrypoint
     jalr    $t9
     sw      rSELF, 16($sp)            # pass Thread::Current
@@ -1588,7 +1809,7 @@
 
   .Lart_quick_alloc_object_rosalloc_slow_path:
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     la    $t9, artAllocObjectFromCodeRosAlloc
     jalr  $t9
     move  $a2, $s1                                                # Pass self as argument.
@@ -1628,18 +1849,20 @@
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
     .extern artTestSuspendFromCode
-ENTRY art_quick_test_suspend
-    lh     $a0, THREAD_FLAGS_OFFSET(rSELF)
-    bnez   $a0, 1f
+ENTRY_NO_GP art_quick_test_suspend
+    lh     rSUSPEND, THREAD_FLAGS_OFFSET(rSELF)
+    bnez   rSUSPEND, 1f
     addiu  rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
     nop
 1:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME                      # save everything for stack crawl
     la     $t9, artTestSuspendFromCode
-    jalr   $t9                                 # (Thread*)
+    jalr   $t9                                       # (Thread*)
     move   $a0, rSELF
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    jalr   $zero, $ra
+    nop
 END art_quick_test_suspend
 
     /*
@@ -1648,13 +1871,13 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF                  # pass Thread::Current
     la      $t9, artQuickProxyInvokeHandler
     jalr    $t9                         # (Method* proxy method, receiver, Thread*, SP)
     addiu   $a3, $sp, ARG_SLOT_SIZE     # pass $sp (remove arg slots)
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bnez    $t0, 1f
     # don't care if $v0 and/or $v1 are modified, when exception branch taken
     MTD     $v0, $v1, $f0, $f1          # move float value to return value
@@ -1705,26 +1928,26 @@
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a2, rSELF                    # pass Thread::Current
     la      $t9, artQuickResolutionTrampoline
     jalr    $t9                           # (Method* called, receiver, Thread*, SP)
     addiu   $a3, $sp, ARG_SLOT_SIZE       # pass $sp (remove arg slots)
     beqz    $v0, 1f
     lw      $a0, ARG_SLOT_SIZE($sp)       # load resolved method to $a0
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
     jalr    $zero, $t9             # tail call to method
     nop
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp to $s8
     move    $s3, $gp               # save $gp to $s3
 
@@ -1771,7 +1994,7 @@
     move    $sp, $s8               # tear down the alloca
 
     # tear dpown the callee-save frame
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     MTD     $v0, $v1, $f0, $f1     # move float value to return value
     jalr    $zero, $ra
@@ -1785,13 +2008,13 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a1, rSELF                          # pass Thread::Current
     la      $t9, artQuickToInterpreterBridge
     jalr    $t9                                 # (Method* method, Thread*, SP)
     addiu   $a2, $sp, ARG_SLOT_SIZE             # pass $sp (remove arg slots)
     lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     bnez    $t0, 1f
     # don't care if $v0 and/or $v1 are modified, when exception branch taken
     MTD     $v0, $v1, $f0, $f1                  # move float value to return value
@@ -1807,7 +2030,7 @@
     .extern artInstrumentationMethodEntryFromCode
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     sw       $a0, 28($sp)   # save arg0 in free arg slot
     move     $a3, $ra       # pass $ra
     la       $t9, artInstrumentationMethodEntryFromCode
@@ -1815,7 +2038,7 @@
     move     $a2, rSELF     # pass Thread::Current
     move     $t9, $v0       # $t9 holds reference to code
     lw       $a0, 28($sp)   # restore arg0 from free arg slot
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     jalr     $t9            # call method
     nop
 END art_quick_instrumentation_entry
@@ -1827,7 +2050,7 @@
     .cpload  $t9
     move     $ra, $zero     # link register is to here, so clobber with 0 for later checks
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     addiu    $sp, $sp, -16  # allocate temp storage on the stack
     .cfi_adjust_cfa_offset 16
     sw       $v0, ARG_SLOT_SIZE+12($sp)
@@ -1848,8 +2071,8 @@
     lw       $v1, ARG_SLOT_SIZE+8($sp)
     l.d      $f0, ARG_SLOT_SIZE($sp)
     jalr     $zero, $t9     # return
-    addiu    $sp, $sp, ARG_SLOT_SIZE+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+16  # restore stack
-    .cfi_adjust_cfa_offset -(ARG_SLOT_SIZE+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+16)
+    addiu    $sp, $sp, ARG_SLOT_SIZE+FRAME_SIZE_SAVE_REFS_ONLY+16  # restore stack
+    .cfi_adjust_cfa_offset -(ARG_SLOT_SIZE+FRAME_SIZE_SAVE_REFS_ONLY+16)
 END art_quick_instrumentation_exit
 
     /*
@@ -1858,7 +2081,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la       $t9, artDeoptimize
     jalr     $t9            # artDeoptimize(Thread*)
                             # Returns caller method's frame size.
@@ -1871,7 +2094,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la       $t9, artDeoptimizeFromCompiledCode
     jalr     $t9                            # artDeoptimizeFromCompiledCode(Thread*)
                                             # Returns caller method's frame size.
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index f5d13c2..90e7b20 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -34,6 +34,12 @@
     (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3);
 static constexpr uint32_t kMipsCalleeSaveAllSpills =
     (1 << art::mips::S0) | (1 << art::mips::S1);
+static constexpr uint32_t kMipsCalleeSaveEverythingSpills =
+    (1 << art::mips::AT) | (1 << art::mips::V0) | (1 << art::mips::V1) |
+    (1 << art::mips::A0) | (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) |
+    (1 << art::mips::T0) | (1 << art::mips::T1) | (1 << art::mips::T2) | (1 << art::mips::T3) |
+    (1 << art::mips::T4) | (1 << art::mips::T5) | (1 << art::mips::T6) | (1 << art::mips::T7) |
+    (1 << art::mips::S0) | (1 << art::mips::S1) | (1 << art::mips::T8) | (1 << art::mips::T9);
 
 static constexpr uint32_t kMipsCalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kMipsCalleeSaveFpRefSpills = 0;
@@ -43,23 +49,34 @@
     (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
     (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
     (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
+static constexpr uint32_t kMipsCalleeSaveFpEverythingSpills =
+    (1 << art::mips::F0) | (1 << art::mips::F1) | (1 << art::mips::F2) | (1 << art::mips::F3) |
+    (1 << art::mips::F4) | (1 << art::mips::F5) | (1 << art::mips::F6) | (1 << art::mips::F7) |
+    (1 << art::mips::F8) | (1 << art::mips::F9) | (1 << art::mips::F10) | (1 << art::mips::F11) |
+    (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15) |
+    (1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19) |
+    (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
+    (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
+    (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
 
 constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
   return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMipsCalleeSaveAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMipsCalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMipsCalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t MipsCalleeSaveFPSpills(Runtime::CalleeSaveType type) {
   return kMipsCalleeSaveFpAlwaysSpills | kMipsCalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMipsCalleeSaveAllFPSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMipsCalleeSaveAllFPSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMipsCalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t MipsCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(MipsCalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(MipsCalleeSaveFPSpills(type))   /* fprs */ +
-                  1 /* Method* */) * kMipsPointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kMipsPointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo MipsCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index a451496..0a9ab7a 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -17,14 +17,15 @@
 #include "thread.h"
 
 #include "asm_support_mips.h"
+#include "base/enums.h"
 #include "base/logging.h"
 
 namespace art {
 
 void Thread::InitCpu() {
-  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<4>().Int32Value());
+  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/mips64/asm_support_mips64.h b/runtime/arch/mips64/asm_support_mips64.h
index 995fcf3..9063d20 100644
--- a/runtime/arch/mips64/asm_support_mips64.h
+++ b/runtime/arch/mips64/asm_support_mips64.h
@@ -20,10 +20,12 @@
 #include "asm_support.h"
 
 // 64 ($f24-$f31) + 64 ($s0-$s7) + 8 ($gp) + 8 ($s8) + 8 ($ra) + 1x8 bytes padding
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 160
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 160
 // 48 ($s2-$s7) + 8 ($gp) + 8 ($s8) + 8 ($ra) + 1x8 bytes padding
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 80
+#define FRAME_SIZE_SAVE_REFS_ONLY 80
 // $f12-$f19, $a1-$a7, $s2-$s7 + $gp + $s8 + $ra, 16 total + 1x8 bytes padding + method*
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 208
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 208
+// $f0-$f31, $at, $v0-$v1, $a0-$a7, $t0-$t3, $s0-$s7, $t8-$t9, $gp, $s8, $ra + padding + method*
+#define FRAME_SIZE_SAVE_EVERYTHING 496
 
 #endif  // ART_RUNTIME_ARCH_MIPS64_ASM_SUPPORT_MIPS64_H_
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 030c127..34b0638 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -28,8 +28,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -97,7 +97,38 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = artReadBarrierMark;
+  // Read barriers (and these entry points in particular) are not
+  // supported in the compiler on MIPS64.
+  qpoints->pReadBarrierMarkReg00 = nullptr;
+  qpoints->pReadBarrierMarkReg01 = nullptr;
+  qpoints->pReadBarrierMarkReg02 = nullptr;
+  qpoints->pReadBarrierMarkReg03 = nullptr;
+  qpoints->pReadBarrierMarkReg04 = nullptr;
+  qpoints->pReadBarrierMarkReg05 = nullptr;
+  qpoints->pReadBarrierMarkReg06 = nullptr;
+  qpoints->pReadBarrierMarkReg07 = nullptr;
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 4abfcf1..0bbb6e1 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -27,7 +27,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 
 //
 // Mips64 specific fault handler functions.
@@ -44,7 +44,7 @@
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]);   // SP register
+  *out_sp = static_cast<uintptr_t>(sc->sc_regs[mips64::SP]);
   VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
@@ -56,7 +56,7 @@
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
       reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips64));
   if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]);  // A0 register
+    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[mips64::A0]);
   } else {
     // The method is at the top of the stack.
     *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
@@ -71,8 +71,11 @@
   *out_return_pc = sc->sc_pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
+
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -80,9 +83,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
-  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips64::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->sc_regs[mips64::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -111,7 +117,7 @@
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uintptr_t sp = sc->sc_regs[29];  // SP register
+  uintptr_t sp = sc->sc_regs[mips64::SP];
   VLOG(signals) << "sp: " << std::hex << sp;
 
   uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);  // BVA addr
@@ -134,7 +140,7 @@
   // caused this fault.  This will be inserted into a callee save frame by
   // the function to which this handler returns (art_quick_throw_stack_overflow).
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
 
   // The kernel will now return to the address in sc->arm_pc.
   return true;
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 8f1a35a..9774eb9 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -41,16 +41,16 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      * callee-save: padding + $f24-$f31 + $s0-$s7 + $gp + $ra + $s8 = 19 total + 1x8 bytes padding
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     daddiu $sp, $sp, -160
     .cfi_adjust_cfa_offset 160
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 160)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 160)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 152($sp)
@@ -89,25 +89,25 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly). Restoration assumes
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). Restoration assumes
      * non-moving GC.
      * Does not include rSUSPEND or rSELF
      * callee-save: padding + $s2-$s7 + $gp + $ra + $s8 = 9 total + 1x8 bytes padding
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro SETUP_SAVE_REFS_ONLY_FRAME
     daddiu $sp, $sp, -80
     .cfi_adjust_cfa_offset 80
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 80)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 80)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 72($sp)
@@ -131,12 +131,12 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
     ld     $ra, 72($sp)
     .cfi_restore 31
     ld     $s8, 64($sp)
@@ -160,7 +160,7 @@
     .cpreturn
 .endm
 
-.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
     ld     $ra, 72($sp)
     .cfi_restore 31
     ld     $s8, 64($sp)
@@ -186,15 +186,15 @@
 .endm
 
 // This assumes the top part of these stack frame types are identical.
-#define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+#define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     daddiu  $sp, $sp, -208
     .cfi_adjust_cfa_offset 208
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 208)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(MIPS64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 208)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS64) size not as expected."
 #endif
 
     sd     $ra, 200($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_LrOffset
@@ -244,27 +244,27 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes
      * non-moving GC.
      * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    ld      $t1, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t1)
+    ld      $t1, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     sd      $a0, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
-.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
     ld     $ra, 200($sp)
     .cfi_restore 31
     ld     $s8, 192($sp)
@@ -314,13 +314,235 @@
 .endm
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
+     *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    daddiu $sp, $sp, -496
+    .cfi_adjust_cfa_offset 496
+
+     // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 496)
+#error "FRAME_SIZE_SAVE_EVERYTHING(MIPS64) size not as expected."
+#endif
+
+    // Save core registers.
+    sd     $ra, 488($sp)
+    .cfi_rel_offset 31, 488
+    sd     $s8, 480($sp)
+    .cfi_rel_offset 30, 480
+    sd     $t9, 464($sp)
+    .cfi_rel_offset 25, 464
+    sd     $t8, 456($sp)
+    .cfi_rel_offset 24, 456
+    sd     $s7, 448($sp)
+    .cfi_rel_offset 23, 448
+    sd     $s6, 440($sp)
+    .cfi_rel_offset 22, 440
+    sd     $s5, 432($sp)
+    .cfi_rel_offset 21, 432
+    sd     $s4, 424($sp)
+    .cfi_rel_offset 20, 424
+    sd     $s3,  416($sp)
+    .cfi_rel_offset 19, 416
+    sd     $s2,  408($sp)
+    .cfi_rel_offset 18, 408
+    sd     $s1,  400($sp)
+    .cfi_rel_offset 17, 400
+    sd     $s0,  392($sp)
+    .cfi_rel_offset 16, 392
+    sd     $t3,  384($sp)
+    .cfi_rel_offset 15, 384
+    sd     $t2,  376($sp)
+    .cfi_rel_offset 14, 376
+    sd     $t1,  368($sp)
+    .cfi_rel_offset 13, 368
+    sd     $t0,  360($sp)
+    .cfi_rel_offset 12, 360
+    sd     $a7, 352($sp)
+    .cfi_rel_offset 11, 352
+    sd     $a6, 344($sp)
+    .cfi_rel_offset 10, 344
+    sd     $a5, 336($sp)
+    .cfi_rel_offset 9, 336
+    sd     $a4, 328($sp)
+    .cfi_rel_offset 8, 328
+    sd     $a3,  320($sp)
+    .cfi_rel_offset 7, 320
+    sd     $a2,  312($sp)
+    .cfi_rel_offset 6, 312
+    sd     $a1,  304($sp)
+    .cfi_rel_offset 5, 304
+    sd     $a0,  296($sp)
+    .cfi_rel_offset 4, 296
+    sd     $v1,  288($sp)
+    .cfi_rel_offset 3, 288
+    sd     $v0,  280($sp)
+    .cfi_rel_offset 2, 280
+
+    // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+    bal 1f
+    .set push
+    .set noat
+    sd     $at,  272($sp)
+    .cfi_rel_offset 1, 272
+    .set pop
+1:
+    .cpsetup $ra, 472, 1b
+
+    // Save FP registers.
+    s.d    $f31, 264($sp)
+    s.d    $f30, 256($sp)
+    s.d    $f29, 248($sp)
+    s.d    $f28, 240($sp)
+    s.d    $f27, 232($sp)
+    s.d    $f26, 224($sp)
+    s.d    $f25, 216($sp)
+    s.d    $f24, 208($sp)
+    s.d    $f23, 200($sp)
+    s.d    $f22, 192($sp)
+    s.d    $f21, 184($sp)
+    s.d    $f20, 176($sp)
+    s.d    $f19, 168($sp)
+    s.d    $f18, 160($sp)
+    s.d    $f17, 152($sp)
+    s.d    $f16, 144($sp)
+    s.d    $f15, 136($sp)
+    s.d    $f14, 128($sp)
+    s.d    $f13, 120($sp)
+    s.d    $f12, 112($sp)
+    s.d    $f11, 104($sp)
+    s.d    $f10, 96($sp)
+    s.d    $f9, 88($sp)
+    s.d    $f8, 80($sp)
+    s.d    $f7, 72($sp)
+    s.d    $f6, 64($sp)
+    s.d    $f5, 56($sp)
+    s.d    $f4, 48($sp)
+    s.d    $f3, 40($sp)
+    s.d    $f2, 32($sp)
+    s.d    $f1, 24($sp)
+    s.d    $f0, 16($sp)
+
+    # load appropriate callee-save-method
+    ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
+    ld      $t1, 0($t1)
+    ld      $t1, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET($t1)
+    sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
+    # Place sp in Thread::Current()->top_quick_frame.
+    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+.endm
+
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    // Restore FP registers.
+    l.d    $f31, 264($sp)
+    l.d    $f30, 256($sp)
+    l.d    $f29, 248($sp)
+    l.d    $f28, 240($sp)
+    l.d    $f27, 232($sp)
+    l.d    $f26, 224($sp)
+    l.d    $f25, 216($sp)
+    l.d    $f24, 208($sp)
+    l.d    $f23, 200($sp)
+    l.d    $f22, 192($sp)
+    l.d    $f21, 184($sp)
+    l.d    $f20, 176($sp)
+    l.d    $f19, 168($sp)
+    l.d    $f18, 160($sp)
+    l.d    $f17, 152($sp)
+    l.d    $f16, 144($sp)
+    l.d    $f15, 136($sp)
+    l.d    $f14, 128($sp)
+    l.d    $f13, 120($sp)
+    l.d    $f12, 112($sp)
+    l.d    $f11, 104($sp)
+    l.d    $f10, 96($sp)
+    l.d    $f9, 88($sp)
+    l.d    $f8, 80($sp)
+    l.d    $f7, 72($sp)
+    l.d    $f6, 64($sp)
+    l.d    $f5, 56($sp)
+    l.d    $f4, 48($sp)
+    l.d    $f3, 40($sp)
+    l.d    $f2, 32($sp)
+    l.d    $f1, 24($sp)
+    l.d    $f0, 16($sp)
+
+    // Restore core registers.
+    .cpreturn
+    ld     $ra, 488($sp)
+    .cfi_restore 31
+    ld     $s8, 480($sp)
+    .cfi_restore 30
+    ld     $t9, 464($sp)
+    .cfi_restore 25
+    ld     $t8, 456($sp)
+    .cfi_restore 24
+    ld     $s7, 448($sp)
+    .cfi_restore 23
+    ld     $s6, 440($sp)
+    .cfi_restore 22
+    ld     $s5, 432($sp)
+    .cfi_restore 21
+    ld     $s4, 424($sp)
+    .cfi_restore 20
+    ld     $s3,  416($sp)
+    .cfi_restore 19
+    ld     $s2,  408($sp)
+    .cfi_restore 18
+    ld     $s1,  400($sp)
+    .cfi_restore 17
+    ld     $s0,  392($sp)
+    .cfi_restore 16
+    ld     $t3,  384($sp)
+    .cfi_restore 15
+    ld     $t2,  376($sp)
+    .cfi_restore 14
+    ld     $t1,  368($sp)
+    .cfi_restore 13
+    ld     $t0,  360($sp)
+    .cfi_restore 12
+    ld     $a7, 352($sp)
+    .cfi_restore 11
+    ld     $a6, 344($sp)
+    .cfi_restore 10
+    ld     $a5, 336($sp)
+    .cfi_restore 9
+    ld     $a4, 328($sp)
+    .cfi_restore 8
+    ld     $a3,  320($sp)
+    .cfi_restore 7
+    ld     $a2,  312($sp)
+    .cfi_restore 6
+    ld     $a1,  304($sp)
+    .cfi_restore 5
+    ld     $a0,  296($sp)
+    .cfi_restore 4
+    ld     $v1,  288($sp)
+    .cfi_restore 3
+    ld     $v0,  280($sp)
+    .cfi_restore 2
+    .set push
+    .set noat
+    ld     $at,  272($sp)
+    .cfi_restore 1
+    .set pop
+
+    daddiu $sp, $sp, 496
+    .cfi_adjust_cfa_offset -496
+.endm
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode,
      * where the pending
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
     SETUP_GP
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
     dla     $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
     move    $a0, rSELF                   # pass Thread::Current
@@ -328,7 +550,7 @@
 
 .macro RETURN_IF_NO_EXCEPTION
     ld     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne    $t0, $zero, 1f                      # success if no exception is pending
     nop
     jalr   $zero, $ra
@@ -338,7 +560,7 @@
 .endm
 
 .macro RETURN_IF_ZERO
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne    $v0, $zero, 1f                # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -348,7 +570,7 @@
 .endm
 
 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     beq    $v0, $zero, 1f                # success?
     nop
     jalr   $zero, $ra                    # return on success
@@ -574,7 +796,7 @@
      * the bottom of the thread. On entry a0 holds Throwable*
      */
 ENTRY art_quick_deliver_exception
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artDeliverExceptionFromCode
     jalr $zero, $t9                 # artDeliverExceptionFromCode(Throwable*, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -586,18 +808,29 @@
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
 .Lart_quick_throw_null_pointer_exception_gp_set:
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowNullPointerExceptionFromCode
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_null_pointer_exception
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException
+     */
+    .extern artThrowNullPointerExceptionFromSignal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    dla  $t9, artThrowNullPointerExceptionFromSignal
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
+END art_quick_throw_null_pointer_exception
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException
      */
     .extern artThrowDivZeroFromCode
 ENTRY art_quick_throw_div_zero
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowDivZeroFromCode
     jalr $zero, $t9                 # artThrowDivZeroFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -610,18 +843,31 @@
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
 .Lart_quick_throw_array_bounds_gp_set:
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowArrayBoundsFromCode
     jalr $zero, $t9                 # artThrowArrayBoundsFromCode(index, limit, Thread*)
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_throw_array_bounds
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt().
+     */
+    .extern artThrowStringBoundsFromCode
+ENTRY art_quick_throw_string_bounds
+.Lart_quick_throw_string_bounds_gp_set:
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    dla  $t9, artThrowStringBoundsFromCode
+    jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
+END art_quick_throw_string_bounds
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
     .extern artThrowStackOverflowFromCode
 ENTRY art_quick_throw_stack_overflow
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowStackOverflowFromCode
     jalr $zero, $t9                 # artThrowStackOverflowFromCode(Thread*)
     move $a0, rSELF                 # pass Thread::Current
@@ -632,7 +878,7 @@
      */
     .extern artThrowNoSuchMethodFromCode
 ENTRY art_quick_throw_no_such_method
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowNoSuchMethodFromCode
     jalr $zero, $t9                 # artThrowNoSuchMethodFromCode(method_idx, Thread*)
     move $a1, rSELF                 # pass Thread::Current
@@ -656,13 +902,13 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  # save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME         # save callee saves in case allocation triggers GC
     move  $a2, rSELF                       # pass Thread::Current
     jal   \cxx_name                        # (method_idx, this, Thread*, $sp)
     move  $a3, $sp                         # pass $sp
     move  $a0, $v0                         # save target Method*
     move  $t9, $v1                         # save $v0->code_
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     beq   $v0, $zero, 1f
     nop
     jalr  $zero, $t9
@@ -951,8 +1197,8 @@
      */
     .extern artHandleFillArrayDataFromCode
 ENTRY art_quick_handle_fill_data
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
-    ld      $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
+    ld      $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)         # pass referrer's Method*
     jal     artHandleFillArrayDataFromCode              # (payload offset, Array*, method, Thread*)
     move    $a3, rSELF                                  # pass Thread::Current
     RETURN_IF_ZERO
@@ -965,7 +1211,7 @@
 ENTRY art_quick_lock_object
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     jal     artLockObjectFromCode         # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -974,7 +1220,7 @@
 ENTRY art_quick_lock_object_no_inline
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     jal     artLockObjectFromCode         # (Object* obj, Thread*)
     move    $a1, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -987,7 +1233,7 @@
 ENTRY art_quick_unlock_object
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
     jal     artUnlockObjectFromCode    # (Object* obj, Thread*)
     move    $a1, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
@@ -996,7 +1242,7 @@
 ENTRY art_quick_unlock_object_no_inline
     beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
     nop
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
     jal     artUnlockObjectFromCode    # (Object* obj, Thread*)
     move    $a1, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
@@ -1029,7 +1275,7 @@
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
     SETUP_GP
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
@@ -1177,7 +1423,7 @@
     SETUP_GP
     bne    $v0, $zero, .Ldo_aput
     nop
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     move   $a1, $a2
     dla  $t9, artThrowArrayStoreException
     jalr $zero, $t9                 # artThrowArrayStoreException(Class*, Class*, Thread*)
@@ -1189,8 +1435,8 @@
      */
     .extern artGetBooleanStaticFromCode
 ENTRY art_quick_get_boolean_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetBooleanStaticFromCode   # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1201,8 +1447,8 @@
      */
     .extern artGetByteStaticFromCode
 ENTRY art_quick_get_byte_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetByteStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1213,8 +1459,8 @@
      */
     .extern artGetCharStaticFromCode
 ENTRY art_quick_get_char_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetCharStaticFromCode      # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1225,8 +1471,8 @@
      */
     .extern artGetShortStaticFromCode
 ENTRY art_quick_get_short_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetShortStaticFromCode     # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1237,8 +1483,8 @@
      */
     .extern artGet32StaticFromCode
 ENTRY art_quick_get32_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet32StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1249,8 +1495,8 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet64StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1261,8 +1507,8 @@
      */
     .extern artGetObjStaticFromCode
 ENTRY art_quick_get_obj_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetObjStaticFromCode       # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1273,8 +1519,8 @@
      */
     .extern artGetBooleanInstanceFromCode
 ENTRY art_quick_get_boolean_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetBooleanInstanceFromCode # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1285,8 +1531,8 @@
      */
     .extern artGetByteInstanceFromCode
 ENTRY art_quick_get_byte_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetByteInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1297,8 +1543,8 @@
      */
     .extern artGetCharInstanceFromCode
 ENTRY art_quick_get_char_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetCharInstanceFromCode    # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1309,8 +1555,8 @@
      */
     .extern artGetShortInstanceFromCode
 ENTRY art_quick_get_short_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetShortInstanceFromCode   # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1321,8 +1567,8 @@
      */
     .extern artGet32InstanceFromCode
 ENTRY art_quick_get32_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet32InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1333,8 +1579,8 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet64InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1345,8 +1591,8 @@
      */
     .extern artGetObjInstanceFromCode
 ENTRY art_quick_get_obj_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGetObjInstanceFromCode     # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_NO_EXCEPTION
@@ -1357,8 +1603,8 @@
      */
     .extern artSet8StaticFromCode
 ENTRY art_quick_set8_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet8StaticFromCode         # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1369,8 +1615,8 @@
      */
     .extern artSet16StaticFromCode
 ENTRY art_quick_set16_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet16StaticFromCode        # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1381,8 +1627,8 @@
      */
     .extern artSet32StaticFromCode
 ENTRY art_quick_set32_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet32StaticFromCode        # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1393,9 +1639,9 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
                                          # a2 contains the new val
-    ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1406,8 +1652,8 @@
      */
     .extern artSetObjStaticFromCode
 ENTRY art_quick_set_obj_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a2, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSetObjStaticFromCode       # (field_idx, new_val, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1418,8 +1664,8 @@
      */
     .extern artSet8InstanceFromCode
 ENTRY art_quick_set8_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet8InstanceFromCode       # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1430,8 +1676,8 @@
      */
     .extern artSet16InstanceFromCode
 ENTRY art_quick_set16_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet16InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1442,8 +1688,8 @@
      */
     .extern artSet32InstanceFromCode
 ENTRY art_quick_set32_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet32InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1454,8 +1700,8 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSet64InstanceFromCode      # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1466,8 +1712,8 @@
      */
     .extern artSetObjInstanceFromCode
 ENTRY art_quick_set_obj_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    ld     $a3, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
+    SETUP_SAVE_REFS_ONLY_FRAME           # save callee saves in case of GC
+    ld     $a3, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artSetObjInstanceFromCode     # (field_idx, Object*, new_val, referrer, Thread*)
     move   $a4, rSELF                    # pass Thread::Current
     RETURN_IF_ZERO
@@ -1477,7 +1723,7 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a1, rSELF                 # pass Thread::Current
     \return
@@ -1488,7 +1734,7 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a2, rSELF                 # pass Thread::Current
     \return
@@ -1498,7 +1744,7 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a3, rSELF                 # pass Thread::Current
     \return
@@ -1508,7 +1754,7 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case of GC
     jal     \entrypoint
     move    $a4, rSELF                 # pass Thread::Current
     \return
@@ -1610,7 +1856,7 @@
     .cpreturn                                    # Restore gp from t8 in branch delay slot.
 
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     jal    artAllocObjectFromCodeRosAlloc
     move   $a2 ,$s1                              # Pass self as argument.
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -1649,17 +1895,19 @@
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
     .extern artTestSuspendFromCode
-ENTRY art_quick_test_suspend
-    lh     $a0, THREAD_FLAGS_OFFSET(rSELF)
-    bne    $a0, $zero, 1f
+ENTRY_NO_GP art_quick_test_suspend
+    lh     rSUSPEND, THREAD_FLAGS_OFFSET(rSELF)
+    bne    rSUSPEND, $zero, 1f
     daddiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
-    .cpreturn                                 # Restore gp from t8 in branch delay slot.
+    nop
 1:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME         # save callee saves for stack crawl
+    SETUP_SAVE_EVERYTHING_FRAME               # save everything for stack crawl
     jal    artTestSuspendFromCode             # (Thread*)
     move   $a0, rSELF
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_SAVE_EVERYTHING_FRAME
+    jalr   $zero, $ra
+    nop
 END art_quick_test_suspend
 
     /*
@@ -1668,13 +1916,13 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
     ld      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
     daddiu  $sp, $sp, REFS_AND_ARGS_MINUS_REFS_SIZE  # skip a0-a7 and f12-f19
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne     $t0, $zero, 1f
     dmtc1   $v0, $f0               # place return value to FP return value
     jalr    $zero, $ra
@@ -1723,26 +1971,26 @@
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickResolutionTrampoline  # (Method* called, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
     beq     $v0, $zero, 1f
     ld      $a0, 0($sp)            # load resolved method in $a0
                                    # artQuickResolutionTrampoline puts resolved method in *SP
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
     jalr    $zero, $t9             # tail call to method
     nop
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp
 
     # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
@@ -1792,7 +2040,7 @@
     move    $sp, $s8               # tear down the alloca
 
     # tear dpown the callee-save frame
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jalr    $zero, $ra
     dmtc1   $v0, $f0               # place return value to FP return value
@@ -1805,13 +2053,13 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     move    $a1, rSELF             # pass Thread::Current
     jal     artQuickToInterpreterBridge    # (Method* method, Thread*, SP)
     move    $a2, $sp               # pass $sp
     ld      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
     daddiu  $sp, $sp, REFS_AND_ARGS_MINUS_REFS_SIZE  # skip a0-a7 and f12-f19
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     bne     $t0, $zero, 1f
     dmtc1   $v0, $f0               # place return value to FP return value
     jalr    $zero, $ra
@@ -1826,7 +2074,7 @@
     .extern artInstrumentationMethodEntryFromCode
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     daddiu   $sp, $sp, -16     # space for saving arg0
     .cfi_adjust_cfa_offset 16
     sd       $a0, 0($sp)       # save arg0
@@ -1837,7 +2085,7 @@
     ld       $a0, 0($sp)       # restore arg0
     daddiu   $sp, $sp, 16      # remove args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     jalr     $t9               # call method
     nop
 END art_quick_instrumentation_entry
@@ -1847,7 +2095,7 @@
     .cfi_startproc
     SETUP_GP
     move     $ra, $zero        # link register is to here, so clobber with 0 for later checks
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     move     $t0, $sp          # remember bottom of caller's frame
     daddiu   $sp, $sp, -16     # save return values and set up args
     .cfi_adjust_cfa_offset 16
@@ -1867,8 +2115,9 @@
     ld       $v0, 0($sp)       # restore return values
     l.d      $f0, 8($sp)
     jalr     $zero, $t9        # return
-    daddiu   $sp, $sp, 16+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE  # 16 bytes of saved values + ref_only callee save frame
-    .cfi_adjust_cfa_offset -(16+FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+    # restore stack, 16 bytes of saved values + ref_only callee save frame
+    daddiu   $sp, $sp, 16+FRAME_SIZE_SAVE_REFS_ONLY
+    .cfi_adjust_cfa_offset -(16+FRAME_SIZE_SAVE_REFS_ONLY)
 END art_quick_instrumentation_exit
 
     /*
@@ -1878,7 +2127,7 @@
     .extern artDeoptimize
     .extern artEnterInterpreterFromDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     jal      artDeoptimize     # artDeoptimize(Thread*, SP)
                                # Returns caller method's frame size.
     move     $a0, rSELF        # pass Thread::current
@@ -1890,7 +2139,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     jal      artDeoptimizeFromCompiledCode    # artDeoptimizeFromCompiledCode(Thread*, SP)
                                               # Returns caller method's frame size.
     move     $a0, rSELF                       # pass Thread::current
diff --git a/runtime/arch/mips64/quick_method_frame_info_mips64.h b/runtime/arch/mips64/quick_method_frame_info_mips64.h
index f967be0..397776e 100644
--- a/runtime/arch/mips64/quick_method_frame_info_mips64.h
+++ b/runtime/arch/mips64/quick_method_frame_info_mips64.h
@@ -25,6 +25,8 @@
 namespace art {
 namespace mips64 {
 
+static constexpr uint32_t kMips64CalleeSaveAlwaysSpills =
+    (1 << art::mips64::RA);
 static constexpr uint32_t kMips64CalleeSaveRefSpills =
     (1 << art::mips64::S2) | (1 << art::mips64::S3) | (1 << art::mips64::S4) |
     (1 << art::mips64::S5) | (1 << art::mips64::S6) | (1 << art::mips64::S7) |
@@ -35,6 +37,14 @@
     (1 << art::mips64::A7);
 static constexpr uint32_t kMips64CalleeSaveAllSpills =
     (1 << art::mips64::S0) | (1 << art::mips64::S1);
+static constexpr uint32_t kMips64CalleeSaveEverythingSpills =
+    (1 << art::mips64::AT) | (1 << art::mips64::V0) | (1 << art::mips64::V1) |
+    (1 << art::mips64::A0) | (1 << art::mips64::A1) | (1 << art::mips64::A2) |
+    (1 << art::mips64::A3) | (1 << art::mips64::A4) | (1 << art::mips64::A5) |
+    (1 << art::mips64::A6) | (1 << art::mips64::A7) | (1 << art::mips64::T0) |
+    (1 << art::mips64::T1) | (1 << art::mips64::T2) | (1 << art::mips64::T3) |
+    (1 << art::mips64::S0) | (1 << art::mips64::S1) | (1 << art::mips64::T8) |
+    (1 << art::mips64::T9);
 
 static constexpr uint32_t kMips64CalleeSaveFpRefSpills = 0;
 static constexpr uint32_t kMips64CalleeSaveFpArgSpills =
@@ -46,23 +56,37 @@
     (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) |
     (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
     (1 << art::mips64::F30) | (1 << art::mips64::F31);
+static constexpr uint32_t kMips64CalleeSaveFpEverythingSpills =
+    (1 << art::mips64::F0) | (1 << art::mips64::F1) | (1 << art::mips64::F2) |
+    (1 << art::mips64::F3) | (1 << art::mips64::F4) | (1 << art::mips64::F5) |
+    (1 << art::mips64::F6) | (1 << art::mips64::F7) | (1 << art::mips64::F8) |
+    (1 << art::mips64::F9) | (1 << art::mips64::F10) | (1 << art::mips64::F11) |
+    (1 << art::mips64::F12) | (1 << art::mips64::F13) | (1 << art::mips64::F14) |
+    (1 << art::mips64::F15) | (1 << art::mips64::F16) | (1 << art::mips64::F17) |
+    (1 << art::mips64::F18) | (1 << art::mips64::F19) | (1 << art::mips64::F20) |
+    (1 << art::mips64::F21) | (1 << art::mips64::F22) | (1 << art::mips64::F23) |
+    (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) |
+    (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
+    (1 << art::mips64::F30) | (1 << art::mips64::F31);
 
 constexpr uint32_t Mips64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kMips64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kMips64CalleeSaveAllSpills : 0) | (1 << art::mips64::RA);
+  return kMips64CalleeSaveAlwaysSpills | kMips64CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMips64CalleeSaveAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMips64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t Mips64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kMips64CalleeSaveFpRefSpills |
-      (type == Runtime::kRefsAndArgs ? kMips64CalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kMips64CalleeSaveFpAllSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kMips64CalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAllCalleeSaves ? kMips64CalleeSaveFpAllSpills : 0) |
+      (type == Runtime::kSaveEverything ? kMips64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t Mips64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(Mips64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(Mips64CalleeSaveFpSpills(type))   /* fprs */ +
-                  + 1 /* Method* */) * kMips64PointerSize, kStackAlignment);
+                  + 1 /* Method* */) * static_cast<size_t>(kMips64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo Mips64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips64/thread_mips64.cc b/runtime/arch/mips64/thread_mips64.cc
index c55537c..3ce5e50 100644
--- a/runtime/arch/mips64/thread_mips64.cc
+++ b/runtime/arch/mips64/thread_mips64.cc
@@ -17,14 +17,15 @@
 #include "thread.h"
 
 #include "asm_support_mips64.h"
+#include "base/enums.h"
 #include "base/logging.h"
 
 namespace art {
 
 void Thread::InitCpu() {
-  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<8>().Int32Value());
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<8>().Int32Value());
+  CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k64>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k64>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 290769b..fa86bf4 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -87,6 +87,27 @@
   ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR
+.endm
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+.endm
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
@@ -219,20 +240,6 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
-// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 3cdff55..80bb51d 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -529,11 +530,7 @@
 
   static uintptr_t GetEntrypoint(Thread* self, QuickEntrypointEnum entrypoint) {
     int32_t offset;
-#ifdef __LP64__
-    offset = GetThreadOffset<8>(entrypoint).Int32Value();
-#else
-    offset = GetThreadOffset<4>(entrypoint).Int32Value();
-#endif
+    offset = GetThreadOffset<kRuntimePointerSize>(entrypoint).Int32Value();
     return *reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(self) + offset);
   }
 
@@ -1016,7 +1013,7 @@
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             // arbitrary
-                            reinterpret_cast<size_t>(c->GetVirtualMethod(0, sizeof(void*))),
+                            reinterpret_cast<size_t>(c->GetVirtualMethod(0, kRuntimePointerSize)),
                             0U,
                             StubTest::GetEntrypoint(self, kQuickAllocObject),
                             self);
@@ -1147,12 +1144,13 @@
 
   if ((false)) {
     // Use an arbitrary method from c to use as referrer
-    size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
-                            10U,
-                            // arbitrary
-                            reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0, sizeof(void*))),
-                            StubTest::GetEntrypoint(self, kQuickAllocArray),
-                            self);
+    size_t result = Invoke3(
+        static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
+        10U,
+        // arbitrary
+        reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0, kRuntimePointerSize)),
+        StubTest::GetEntrypoint(self, kQuickAllocArray),
+        self);
 
     EXPECT_FALSE(self->IsExceptionPending());
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
@@ -1205,8 +1203,9 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || \
-    defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__))
+  // There is no StringCompareTo runtime entrypoint for __arm__ or __aarch64__.
+#if defined(__i386__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -1798,7 +1797,7 @@
   Handle<mirror::Object> obj(hs.NewHandle(soa.Decode<mirror::Object*>(o)));
   Handle<mirror::Class> c(hs.NewHandle(obj->GetClass()));
   // Need a method as a referrer
-  ArtMethod* m = c->GetDirectMethod(0, sizeof(void*));
+  ArtMethod* m = c->GetDirectMethod(0, kRuntimePointerSize);
 
   // Play with it...
 
@@ -2014,10 +2013,10 @@
       Runtime::Current()->GetClassLinker()->CreateImtConflictTable(/*count*/0u, linear_alloc);
   void* data = linear_alloc->Alloc(
       self,
-      ImtConflictTable::ComputeSizeWithOneMoreEntry(empty_conflict_table, sizeof(void*)));
+      ImtConflictTable::ComputeSizeWithOneMoreEntry(empty_conflict_table, kRuntimePointerSize));
   ImtConflictTable* new_table = new (data) ImtConflictTable(
-      empty_conflict_table, inf_contains, contains_amethod, sizeof(void*));
-  conflict_method->SetImtConflictTable(new_table, sizeof(void*));
+      empty_conflict_table, inf_contains, contains_amethod, kRuntimePointerSize);
+  conflict_method->SetImtConflictTable(new_table, kRuntimePointerSize);
 
   size_t result =
       Invoke3WithReferrerAndHidden(reinterpret_cast<size_t>(conflict_method),
@@ -2152,6 +2151,8 @@
 #endif
 }
 
+// TODO: Exercise the ReadBarrierMarkRegX entry points.
+
 TEST_F(StubTest, ReadBarrier) {
 #if defined(ART_USE_READ_BARRIER) && (defined(__i386__) || defined(__arm__) || \
       defined(__aarch64__) || defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__)))
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index b0a6017..2bba08d 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -19,10 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 32
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-
-// 32 bytes for GPRs and 32 bytes for FPRs.
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (32 + 32)
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 32
+#define FRAME_SIZE_SAVE_REFS_ONLY 32
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS (32 + 32)
+#define FRAME_SIZE_SAVE_EVERYTHING (48 + 64)
 
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 15a8571..bdf11da 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -25,11 +25,21 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t art_quick_is_assignable(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
-extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -76,7 +86,37 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (ESP) to pass arguments.
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  // x86 has only 8 core registers.
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 };
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index d7c4cb1..3efeb40 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -20,6 +20,7 @@
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "base/logging.h"
@@ -36,6 +37,7 @@
 #define CTX_EIP uc_mcontext->__ss.__rip
 #define CTX_EAX uc_mcontext->__ss.__rax
 #define CTX_METHOD uc_mcontext->__ss.__rdi
+#define CTX_RDI uc_mcontext->__ss.__rdi
 #define CTX_JMP_BUF uc_mcontext->__ss.__rdi
 #else
 // 32 bit mac build.
@@ -69,18 +71,9 @@
 
 namespace art {
 
-#if defined(__APPLE__) && defined(__x86_64__)
-// mac symbols have a prefix of _ on x86_64
-extern "C" void _art_quick_throw_null_pointer_exception();
-extern "C" void _art_quick_throw_stack_overflow();
-extern "C" void _art_quick_test_suspend();
-#define EXT_SYM(sym) _ ## sym
-#else
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_test_suspend();
-#define EXT_SYM(sym) sym
-#endif
 
 // Note this is different from the others (no underscore on 64 bit mac) due to
 // the way the symbol is defined in the .S file.
@@ -292,7 +285,10 @@
   *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
-bool NullPointerHandler::Action(int, siginfo_t*, void* context) {
+bool NullPointerHandler::Action(int, siginfo_t* sig, void* context) {
+  if (!IsValidImplicitCheck(sig)) {
+    return false;
+  }
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
   uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
@@ -314,7 +310,15 @@
   *next_sp = retaddr;
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_null_pointer_exception));
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(
+      art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+#if defined(__x86_64__)
+  uc->CTX_RDI = reinterpret_cast<uintptr_t>(sig->si_addr);
+#else
+  uc->CTX_EAX = reinterpret_cast<uintptr_t>(sig->si_addr);
+#endif
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -335,11 +339,7 @@
 bool SuspensionHandler::Action(int, siginfo_t*, void* context) {
   // These are the instructions to check for.  The first one is the mov eax, fs:[xxx]
   // where xxx is the offset of the suspend trigger.
-#if defined(__x86_64__)
-  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<8>().Int32Value();
-#else
-  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
-#endif
+  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<kRuntimePointerSize>().Int32Value();
 
   VLOG(signals) << "Checking for suspension point";
 #if defined(__x86_64__)
@@ -388,7 +388,7 @@
     *next_sp = retaddr;
     uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-    uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_test_suspend));
+    uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
 
     // Now remove the suspend trigger that caused this fault.
     Thread::Current()->RemoveSuspendTrigger();
@@ -434,7 +434,7 @@
   // the previous frame.
 
   // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_stack_overflow));
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
 
   return true;
 }
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index b97a8db..0093e82 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -45,11 +45,6 @@
     "silvermont",
 };
 
-static constexpr const char* x86_variants_prefer_locked_add_sync[] = {
-    "atom",
-    "silvermont",
-};
-
 static constexpr const char* x86_variants_with_popcnt[] = {
     "silvermont",
 };
@@ -69,10 +64,6 @@
   bool has_AVX = false;
   bool has_AVX2 = false;
 
-  bool prefers_locked_add = FindVariantInArray(x86_variants_prefer_locked_add_sync,
-                                               arraysize(x86_variants_prefer_locked_add_sync),
-                                               variant);
-
   bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt,
                                        arraysize(x86_variants_with_popcnt),
                                        variant);
@@ -86,10 +77,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   }
 }
 
@@ -101,16 +92,13 @@
   bool has_SSE4_2 = (bitmap & kSse4_2Bitfield) != 0;
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
-  bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
   bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                            has_AVX, has_AVX2, prefers_locked_add,
-                                            has_POPCNT);
+                                            has_AVX, has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                         has_AVX, has_AVX2, prefers_locked_add,
-                                         has_POPCNT);
+                                         has_AVX, has_AVX2, has_POPCNT);
   }
 }
 
@@ -147,9 +135,6 @@
   const bool has_AVX2 = true;
 #endif
 
-  // No #define for memory synchronization preference.
-  const bool prefers_locked_add = false;
-
 #ifndef __POPCNT__
   const bool has_POPCNT = false;
 #else
@@ -158,10 +143,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, prefers_locked_add, has_POPCNT);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
@@ -174,8 +159,6 @@
   bool has_SSE4_2 = false;
   bool has_AVX = false;
   bool has_AVX2 = false;
-  // No cpuinfo for memory synchronization preference.
-  const bool prefers_locked_add = false;
   bool has_POPCNT = false;
 
   std::ifstream in("/proc/cpuinfo");
@@ -217,10 +200,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, prefers_locked_add, has_POPCNT);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
@@ -245,7 +228,6 @@
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
       (has_AVX2_ == other_as_x86->has_AVX2_) &&
-      (prefers_locked_add_ == other_as_x86->prefers_locked_add_) &&
       (has_POPCNT_ == other_as_x86->has_POPCNT_);
 }
 
@@ -256,7 +238,6 @@
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
       (has_AVX2_ ? kAvx2Bitfield : 0) |
-      (prefers_locked_add_ ? kPrefersLockedAdd : 0) |
       (has_POPCNT_ ? kPopCntBitfield : 0);
 }
 
@@ -292,11 +273,6 @@
   } else {
     result += ",-avx2";
   }
-  if (prefers_locked_add_) {
-    result += ",lock_add";
-  } else {
-    result += ",-lock_add";
-  }
   if (has_POPCNT_) {
     result += ",popcnt";
   } else {
@@ -313,7 +289,6 @@
   bool has_SSE4_2 = has_SSE4_2_;
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
-  bool prefers_locked_add = prefers_locked_add_;
   bool has_POPCNT = has_POPCNT_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
@@ -337,10 +312,6 @@
       has_AVX2 = true;
     } else if (feature == "-avx2") {
       has_AVX2 = false;
-    } else if (feature == "lock_add") {
-      prefers_locked_add = true;
-    } else if (feature == "-lock_add") {
-      prefers_locked_add = false;
     } else if (feature == "popcnt") {
       has_POPCNT = true;
     } else if (feature == "-popcnt") {
@@ -352,10 +323,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, prefers_locked_add, has_POPCNT);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 1819654..2aa8ae6 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -60,8 +60,6 @@
 
   bool HasSSE4_1() const { return has_SSE4_1_; }
 
-  bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
-
   bool HasPopCnt() const { return has_POPCNT_; }
 
  protected:
@@ -77,16 +75,13 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2,
-                            bool prefers_locked_add,
-                            bool has_POPCNT)
+                            bool has_AVX, bool has_AVX2, bool has_POPCNT)
       : InstructionSetFeatures(smp),
         has_SSSE3_(has_SSSE3),
         has_SSE4_1_(has_SSE4_1),
         has_SSE4_2_(has_SSE4_2),
         has_AVX_(has_AVX),
         has_AVX2_(has_AVX2),
-        prefers_locked_add_(prefers_locked_add),
         has_POPCNT_(has_POPCNT) {
   }
 
@@ -99,8 +94,7 @@
     kSse4_2Bitfield = 8,
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
-    kPrefersLockedAdd = 64,
-    kPopCntBitfield = 128,
+    kPopCntBitfield = 64,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -108,7 +102,6 @@
   const bool has_SSE4_2_;  // x86 128bit SIMD SSE4.2.
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
-  const bool prefers_locked_add_;  // x86 use locked add for memory synchronization.
   const bool has_POPCNT_;  // x86 population count
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index a062c12..9e154c6 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
@@ -40,9 +40,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 67U);
+  EXPECT_EQ(x86_features->AsBitmap(), 3U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -50,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -60,9 +60,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 3U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
@@ -77,9 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
                x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 207U);
+  EXPECT_EQ(x86_features->AsBitmap(), 79U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -87,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -97,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 207U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 3f2abfe..d685ace 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -22,9 +22,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-MACRO2(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
@@ -35,22 +35,22 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 3*4 + 16 + 4)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86) size not as expected."
 #endif
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
      */
-MACRO2(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_REFS_ONLY_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
@@ -61,28 +61,28 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 3*4 + 16 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86) size not as expected."
 #endif
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
      * and preserves the value of got_reg at entry.
      */
-MACRO2(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_GOT_REG, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_REFS_ONLY_FRAME_PRESERVE_GOT_REG, got_reg, temp_reg)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
     PUSH esi
     PUSH ebp
-    pushl REG_VAR(got_reg)  // Save got_reg
+    PUSH RAW_VAR(got_reg)  // Save got_reg
     subl MACRO_LITERAL(8), %esp  // Grow stack by 2 words.
     CFI_ADJUST_CFA_OFFSET(8)
 
@@ -91,21 +91,22 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
     // Restore got_reg.
     movl 12(%esp), REG_VAR(got_reg)
+    CFI_RESTORE(RAW_VAR(got_reg))
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 3*4 + 16 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 3*4 + 16 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86) size not as expected."
 #endif
 END_MACRO
 
-MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
     addl MACRO_LITERAL(16), %esp  // Unwind stack up to saved values
     CFI_ADJUST_CFA_OFFSET(-16)
     POP ebp  // Restore callee saves (ebx is saved/restored by the upcall)
@@ -115,9 +116,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
      */
-MACRO2(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_REFS_AND_ARGS_FRAME, got_reg, temp_reg)
     PUSH edi  // Save callee saves
     PUSH esi
     PUSH ebp
@@ -138,23 +139,23 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    pushl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    pushl RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the stop quick frame.
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +4: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 7*4 + 4*8 + 4)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 7*4 + 4*8 + 4)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86) size not as expected."
 #endif
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs) where the method is passed in EAX.
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs) where the method is passed in EAX.
      */
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX)
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     PUSH edi  // Save callee saves
     PUSH esi
@@ -178,7 +179,7 @@
     movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
 END_MACRO
 
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
     // Restore FPRs. EAX is still on the stack.
     movsd 4(%esp), %xmm0
     movsd 12(%esp), %xmm1
@@ -199,7 +200,7 @@
 // Restore register and jump to routine
 // Inputs:  EDI contains pointer to code.
 // Notes: Need to pop EAX too (restores Method*)
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME_AND_JUMP)
     POP eax  // Restore Method*
 
     // Restore FPRs.
@@ -221,13 +222,81 @@
 END_MACRO
 
     /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
+    // Save core registers.
+    PUSH edi
+    PUSH esi
+    PUSH ebp
+    PUSH ebx
+    PUSH edx
+    PUSH ecx
+    PUSH eax
+    // Create space for FPR registers and stack alignment padding.
+    subl MACRO_LITERAL(12 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(12 + 8 * 8)
+    // Save FPRs.
+    movsd %xmm0, 12(%esp)
+    movsd %xmm1, 20(%esp)
+    movsd %xmm2, 28(%esp)
+    movsd %xmm3, 36(%esp)
+    movsd %xmm4, 44(%esp)
+    movsd %xmm5, 52(%esp)
+    movsd %xmm6, 60(%esp)
+    movsd %xmm7, 68(%esp)
+
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg)
+    // Load Runtime::instance_ from GOT.
+    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+    movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+    // Push save everything callee-save method.
+    pushl RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(REG_VAR(temp_reg))
+    CFI_ADJUST_CFA_OFFSET(4)
+    // Store esp as the stop quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+    // Ugly compile-time check, but we only have the preprocessor.
+    // Last +4: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 7*4 + 8*8 + 12 + 4 + 4)
+#error "FRAME_SIZE_SAVE_EVERYTHING(X86) size not as expected."
+#endif
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+    // Restore FPRs. Method and padding is still on the stack.
+    movsd 16(%esp), %xmm0
+    movsd 24(%esp), %xmm1
+    movsd 32(%esp), %xmm2
+    movsd 40(%esp), %xmm3
+    movsd 48(%esp), %xmm4
+    movsd 56(%esp), %xmm5
+    movsd 64(%esp), %xmm6
+    movsd 72(%esp), %xmm7
+
+    // Remove save everything callee save method, stack alignment padding and FPRs.
+    addl MACRO_LITERAL(16 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8))
+
+    // Restore core registers.
+    POP eax
+    POP ecx
+    POP edx
+    POP ebx
+    POP ebp
+    POP esi
+    POP edi
+END_MACRO
+
+    /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_.
      */
 MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp              // Alignment padding
+    subl MACRO_LITERAL(12), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -237,23 +306,23 @@
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp                // alignment padding
+    subl MACRO_LITERAL(12), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET                // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    call CALLVAR(cxx_name)                      // cxx_name(Thread*)
+    call CALLVAR(cxx_name)                     // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
-    subl MACRO_LITERAL(8), %esp               // alignment padding
+    subl MACRO_LITERAL(8), %esp                // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -265,7 +334,7 @@
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                                   // alignment padding
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
@@ -283,6 +352,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -310,6 +384,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
      * the method_idx.  This wrapper will save arg1-arg3 and call the appropriate C helper.
@@ -325,7 +405,7 @@
      * pointing back to the original caller.
      */
 MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, ebx
     movl %esp, %edx  // remember SP
 
     // Outgoing argument set up
@@ -649,25 +729,9 @@
     ret
 END_FUNCTION art_quick_invoke_static_stub
 
-MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
-    // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp                // push padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    pushl %fs:THREAD_SELF_OFFSET                // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    call CALLVAR(cxx_name)                      // cxx_name(Thread*)
-    addl MACRO_LITERAL(16), %esp                // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME         // restore frame up to return address
-    CALL_MACRO(return_macro)                    // return or deliver exception
-    END_FUNCTION VAR(c_name)
-END_MACRO
-
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp                  // push padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -677,14 +741,14 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     PUSH eax                                     // push padding
     pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
@@ -694,14 +758,14 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx         // save ref containing registers for GC
     // Outgoing argument set up
     pushl %fs:THREAD_SELF_OFFSET                 // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -711,14 +775,14 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, arg3, Thread*)
     addl MACRO_LITERAL(16), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_GOT_REG  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME_PRESERVE_GOT_REG ebx, ebx  // save ref containing registers for GC
 
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp                 // alignment padding
@@ -732,16 +796,16 @@
     call CALLVAR(cxx_name)                       // cxx_name(arg1, arg2, arg3, arg4, Thread*)
     addl MACRO_LITERAL(32), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx       // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx         // get referrer
     PUSH eax                                          // push padding
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -750,16 +814,16 @@
     call CALLVAR(cxx_name)                            // cxx_name(arg1, referrer, Thread*)
     addl MACRO_LITERAL(16), %esp                      // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx        // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %edx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %edx         // get referrer
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                                          // pass referrer
@@ -768,16 +832,16 @@
     call CALLVAR(cxx_name)                            // cxx_name(arg1, arg2, referrer, Thread*)
     addl MACRO_LITERAL(16), %esp                      // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx        // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx               // save ref containing registers for GC
     // Outgoing argument set up
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ebx         // get referrer
     subl MACRO_LITERAL(12), %esp                      // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
@@ -790,7 +854,7 @@
                                                       //          Thread*)
     addl LITERAL(32), %esp                            // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME               // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                      // restore frame up to return address
     CALL_MACRO(return_macro)                          // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -906,7 +970,7 @@
     ret
 .Lart_quick_alloc_object_rosalloc_slow_path:
     POP edi
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx          // save ref containing registers for GC
     // Outgoing argument set up
     PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -914,16 +978,174 @@
     PUSH ecx
     PUSH eax
     call SYMBOL(artAllocObjectFromCodeRosAlloc)  // cxx_name(arg0, arg1, Thread*)
-    addl LITERAL(16), %esp        // pop arguments
+    addl LITERAL(16), %esp                       // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // resotre frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER      // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// EAX: type_idx/return_value, ECX: ArtMethod*, EDX: the class.
+MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
+    testl %edx, %edx                                    // Check null class
+    jz   VAR(slowPathLabel)
+                                                        // Check class status.
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx)
+    jne  VAR(slowPathLabel)
+                                                        // No fake dependence needed on x86
+                                                        // between status and flags load,
+                                                        // since each load is a load-acquire,
+                                                        // no loads reordering.
+                                                        // Check access flags has
+                                                        // kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx)
+    jnz  VAR(slowPathLabel)
+    movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
+    movl THREAD_LOCAL_END_OFFSET(%ebx), %edi            // Load thread_local_end.
+    subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi            // Compute the remaining buffer size.
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %esi    // Load the object size.
+    cmpl %edi, %esi                                     // Check if it fits. OK to do this
+                                                        // before rounding up the object size
+                                                        // assuming the buf size alignment.
+    ja   VAR(slowPathLabel)
+    addl LITERAL(OBJECT_ALIGNMENT_MASK), %esi           // Align the size by 8. (addr + 7) & ~7.
+    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %esi
+    movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax            // Load thread_local_pos
+                                                        // as allocated object.
+    addl %eax, %esi                                     // Add the object size.
+    movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx)            // Update thread_local_pos.
+    addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx)  // Increase thread_local_objects.
+                                                        // Store the class pointer in the header.
+                                                        // No fence needed for x86.
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax)
+    POP edi
+    POP esi
+    ret                                                 // Fast path succeeded.
+END_MACRO
 
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
+    POP edi
+    POP esi
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx                 // save ref containing registers for GC
+    // Outgoing argument set up
+    PUSH eax                                            // alignment padding
+    pushl %fs:THREAD_SELF_OFFSET                        // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx
+    PUSH eax
+    call CALLVAR(cxx_name)                              // cxx_name(arg0, arg1, Thread*)
+    addl LITERAL(16), %esp
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_REFS_ONLY_FRAME                        // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER             // return or deliver exception
+END_MACRO
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+DEFINE_FUNCTION art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
+    // EBX, EDX: free.
+#if defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    PUSH esi
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                            // Load the class
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
+END_FUNCTION art_quick_alloc_object_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_region_tlab
+    // Fast path region tlab allocation.
+    // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
+    // EBX, EDX: free.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    PUSH esi
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                            // Load the class
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+                                                            // Read barrier for class load.
+    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Null check so that we can load the lock word.
+    testl %edx, %edx
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH eax
+    PUSH ecx
+    // Outgoing argument set up
+    subl MACRO_LITERAL(8), %esp                             // Alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    PUSH edx                                                // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                         // cxx_name(mirror::Object* obj)
+    movl %eax, %edx
+    addl MACRO_LITERAL(12), %esp
+    CFI_ADJUST_CFA_OFFSET(-12)
+    POP ecx
+    POP eax
+    jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_object_region_tlab
+
+DEFINE_FUNCTION art_quick_resolve_string
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx
+    movl FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx                   // get referrer
+    movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx           // get declaring class
+    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx    // get string dex cache
+    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx
+    andl %eax, %edx
+    shl LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %edx
+    addl %ecx, %edx
+    movlps (%edx), %xmm0                                     // load string idx and pointer to xmm0
+    movd %xmm0, %ecx                                         // extract pointer
+    pshufd LITERAL(0x55), %xmm0, %xmm0                       // shuffle index into lowest bits
+    movd %xmm0, %edx                                         // extract index
+    cmp %edx, %eax
+    jne .Lart_quick_resolve_string_slow_path
+#ifdef USE_READ_BARRIER
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jz .Lart_quick_resolve_string_slow_path
+#endif
+    movl %ecx, %eax
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    ret
+
+.Lart_quick_resolve_string_slow_path:
+    // Outgoing argument set up
+    subl LITERAL(8), %esp                                        // push padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET                                 // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                                                     // pass arg1
+    call SYMBOL(artResolveStringFromCode)
+    addl LITERAL(16), %esp                                       // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END_FUNCTION art_quick_resolve_string
+
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -938,7 +1160,7 @@
     test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // test the 2 high bits.
     jne  .Lslow_lock                      // slow path if either of the two high bits are set.
     movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     test %ecx, %ecx
     jnz  .Lalready_thin                   // lock word contains a thin lock
     // unlocked case - edx: original lock word, eax: obj.
@@ -954,9 +1176,9 @@
     cmpw %cx, %dx                         // do we hold the lock already?
     jne  .Lslow_lock
     movl %edx, %ecx                       // copy the lock word to check count overflow.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the read barrier bits.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count for overflow check.
-    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set.
+    test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // overflowed if the first gc state bit is set.
     jne  .Lslow_lock                      // count overflowed so go slow
     movl %eax, %ecx                       // save obj to use eax for cmpxchg.
     movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
@@ -969,7 +1191,7 @@
     movl  %ecx, %eax                      // restore eax
     jmp  .Lretry_lock
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -979,12 +1201,12 @@
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
 DEFINE_FUNCTION art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -994,7 +1216,7 @@
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object_no_inline
 
@@ -1010,13 +1232,13 @@
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     movl %ecx, %edx                       // copy the lock word to detect new count of 0.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
     cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
     jae  .Lrecursive_thin_unlock
     // update lockword, cmpxchg necessary for read barrier bits.
     movl %eax, %edx                       // edx: obj
     movl %ecx, %eax                       // eax: old lock word.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original rb bits.
 #ifndef USE_READ_BARRIER
     movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
 #else
@@ -1040,7 +1262,7 @@
     movl %edx, %eax                       // restore eax
     jmp  .Lretry_unlock
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1050,12 +1272,12 @@
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
 DEFINE_FUNCTION art_quick_unlock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     // Outgoing argument set up
     subl LITERAL(8), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1065,7 +1287,7 @@
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
     addl LITERAL(16), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
@@ -1096,7 +1318,7 @@
     POP ecx
     addl LITERAL(4), %esp
     CFI_ADJUST_CFA_OFFSET(-4)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  ebx, ebx  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                              // alignment padding
     pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
@@ -1248,7 +1470,7 @@
     POP  edx
     POP  ecx
     POP  eax
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1270,7 +1492,19 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_quick_test_suspend
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx              // save everything for GC
+    // Outgoing argument set up
+    subl MACRO_LITERAL(12), %esp                      // push padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET                      // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artTestSuspendFromCode)               // (Thread*)
+    addl MACRO_LITERAL(16), %esp                      // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_SAVE_EVERYTHING_FRAME                     // restore frame up to return address
+    ret                                               // return
+END_FUNCTION art_quick_test_suspend
 
 DEFINE_FUNCTION art_quick_d2l
     subl LITERAL(12), %esp        // alignment padding, room for argument
@@ -1395,14 +1629,14 @@
 // Call artSet64InstanceFromCode with 4 word size arguments and the referrer.
 DEFINE_FUNCTION art_quick_set64_instance
     movd %ebx, %xmm0
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx  // save ref containing registers for GC
     movd %xmm0, %ebx
     // Outgoing argument set up
     subl LITERAL(8), %esp         // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    pushl (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE+12)(%esp)  // pass referrer
+    pushl (FRAME_SIZE_SAVE_REFS_ONLY+12)(%esp)  // pass referrer
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ebx                      // pass high half of new_val
     PUSH edx                      // pass low half of new_val
@@ -1411,7 +1645,7 @@
     call SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*)
     addl LITERAL(32), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_instance
 
@@ -1420,9 +1654,9 @@
 DEFINE_FUNCTION art_quick_set64_static
     // TODO: Implement SETUP_GOT_NOSAVE for got_reg = ecx to avoid moving around the registers.
     movd %ebx, %xmm0
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx  // save ref containing registers for GC
     movd %xmm0, %ebx
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
+    mov FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx  // get referrer
     subl LITERAL(12), %esp        // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1434,12 +1668,12 @@
     call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
     addl LITERAL(32), %esp        // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX
     PUSH esp                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -1449,9 +1683,9 @@
     movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
-    addl LITERAL(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %esp
-    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE))
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    addl LITERAL(16 + FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY))
+    RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_proxy_invoke_handler
 
@@ -1496,7 +1730,7 @@
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, ebx
     movl %esp, %edi
     PUSH EDI                      // pass SP. do not just PUSH ESP; that messes up unwinding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
@@ -1509,14 +1743,14 @@
     CFI_ADJUST_CFA_OFFSET(-16)
     test %eax, %eax               // if code pointer is null goto deliver pending exception
     jz 1f
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME_AND_JUMP
 1:
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_resolution_trampoline
 
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_EAX
     movl %esp, %ebp               // save SP at callee-save frame
     CFI_DEF_CFA_REGISTER(ebp)
     subl LITERAL(5120), %esp
@@ -1595,7 +1829,7 @@
 END_FUNCTION art_quick_generic_jni_trampoline
 
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  ebx, ebx  // save frame
+    SETUP_SAVE_REFS_AND_ARGS_FRAME  ebx, ebx  // save frame
     mov %esp, %edx                // remember SP
     PUSH eax                      // alignment padding
     PUSH edx                      // pass SP
@@ -1625,11 +1859,11 @@
      * Routine that intercepts method calls and returns.
      */
 DEFINE_FUNCTION art_quick_instrumentation_entry
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, edx
+    SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, edx
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-4+16(%esp)  // Pass LR.
+    pushl FRAME_SIZE_SAVE_REFS_AND_ARGS-4+16(%esp)  // Pass LR.
     CFI_ADJUST_CFA_OFFSET(4)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
@@ -1664,7 +1898,7 @@
 DEFINE_FUNCTION art_quick_instrumentation_exit
     pushl LITERAL(0)              // Push a fake return PC as there will be none on the stack.
     CFI_ADJUST_CFA_OFFSET(4)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx
     mov  %esp, %ecx               // Remember SP
     subl LITERAL(8), %esp         // Save float return value.
     CFI_ADJUST_CFA_OFFSET(8)
@@ -1690,7 +1924,7 @@
     movq (%esp), %xmm0            // Restore fpr return value.
     addl LITERAL(8), %esp
     CFI_ADJUST_CFA_OFFSET(-8)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
     addl LITERAL(4), %esp         // Remove fake return pc.
     CFI_ADJUST_CFA_OFFSET(-4)
     jmp   *%ecx                   // Return.
@@ -1702,7 +1936,7 @@
      */
 DEFINE_FUNCTION art_quick_deoptimize
     PUSH ebx                      // Entry point for a jump. Fake that we were called.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
@@ -1716,7 +1950,7 @@
      * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx
     subl LITERAL(12), %esp                      // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET                // Pass Thread::Current().
@@ -1781,13 +2015,90 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
-DEFINE_FUNCTION art_quick_read_barrier_mark
-    PUSH eax                         // pass arg1 - obj
-    call SYMBOL(artReadBarrierMark)  // artReadBarrierMark(obj)
-    addl LITERAL(4), %esp            // pop argument
-    CFI_ADJUST_CFA_OFFSET(-4)
+// Create a function `name` calling the ReadBarrier::Mark routine,
+// getting its argument and returning its result through register
+// `reg`, saving and restoring all caller-save registers.
+//
+// If `reg` is different from `eax`, the generated function follows a
+// non-standard runtime calling convention:
+// - register `reg` is used to pass the (sole) argument of this function
+//   (instead of EAX);
+// - register `reg` is used to return the result of this function
+//   (instead of EAX);
+// - EAX is treated like a normal (non-argument) caller-save register;
+// - everything else is the same as in the standard runtime calling
+//   convention (e.g. standard callee-save registers are preserved).
+MACRO2(READ_BARRIER_MARK_REG, name, reg)
+    DEFINE_FUNCTION VAR(name)
+    // Null check so that we can load the lock word.
+    test REG_VAR(reg), REG_VAR(reg)
+    jz .Lret_rb_\name
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+    jz .Lslow_rb_\name
     ret
-END_FUNCTION art_quick_read_barrier_mark
+.Lslow_rb_\name:
+    // Save all potentially live caller-save core registers.
+    PUSH eax
+    PUSH ecx
+    PUSH edx
+    PUSH ebx
+    // 8-byte align the stack to improve (8-byte) XMM register saving and restoring.
+    // and create space for caller-save floating-point registers.
+    subl MACRO_LITERAL(4 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(4 + 8 * 8)
+    // Save all potentially live caller-save floating-point registers.
+    movsd %xmm0, 0(%esp)
+    movsd %xmm1, 8(%esp)
+    movsd %xmm2, 16(%esp)
+    movsd %xmm3, 24(%esp)
+    movsd %xmm4, 32(%esp)
+    movsd %xmm5, 40(%esp)
+    movsd %xmm6, 48(%esp)
+    movsd %xmm7, 56(%esp)
+
+    subl LITERAL(4), %esp            // alignment padding
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH RAW_VAR(reg)                // pass arg1 - obj from `reg`
+    call SYMBOL(artReadBarrierMark)  // artReadBarrierMark(obj)
+    .ifnc RAW_VAR(reg), eax
+      movl %eax, REG_VAR(reg)        // return result into `reg`
+    .endif
+    addl LITERAL(8), %esp            // pop argument and remove padding
+    CFI_ADJUST_CFA_OFFSET(-8)
+
+    // Restore floating-point registers.
+    movsd 0(%esp), %xmm0
+    movsd 8(%esp), %xmm1
+    movsd 16(%esp), %xmm2
+    movsd 24(%esp), %xmm3
+    movsd 32(%esp), %xmm4
+    movsd 40(%esp), %xmm5
+    movsd 48(%esp), %xmm6
+    movsd 56(%esp), %xmm7
+    // Remove floating-point registers and padding.
+    addl MACRO_LITERAL(8 * 8 + 4), %esp
+    CFI_ADJUST_CFA_OFFSET(-(8 * 8 + 4))
+    // Restore core regs, except `reg`, as it is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REG_NE ebx, RAW_VAR(reg)
+    POP_REG_NE edx, RAW_VAR(reg)
+    POP_REG_NE ecx, RAW_VAR(reg)
+    POP_REG_NE eax, RAW_VAR(reg)
+.Lret_rb_\name:
+    ret
+    END_FUNCTION VAR(name)
+END_MACRO
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, eax
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, ecx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, edx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, ebx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, ebp
+// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (ESP)
+// cannot be used to pass arguments.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, esi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, edi
 
 DEFINE_FUNCTION art_quick_read_barrier_slow
     PUSH edx                         // pass arg3 - offset
@@ -1800,10 +2111,12 @@
 END_FUNCTION art_quick_read_barrier_slow
 
 DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
+    subl LITERAL(8), %esp                   // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH eax                                // pass arg1 - root
     call SYMBOL(artReadBarrierForRootSlow)  // artReadBarrierForRootSlow(root)
-    addl LITERAL(4), %esp                   // pop argument
-    CFI_ADJUST_CFA_OFFSET(-4)
+    addl LITERAL(12), %esp                  // pop argument and remove padding
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_read_barrier_for_root_slow
 
diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h
index ed1d860..9fcde35 100644
--- a/runtime/arch/x86/quick_method_frame_info_x86.h
+++ b/runtime/arch/x86/quick_method_frame_info_x86.h
@@ -36,27 +36,39 @@
   XMM7 = 7,
 };
 
+static constexpr uint32_t kX86CalleeSaveAlwaysSpills =
+    (1 << art::x86::kNumberOfCpuRegisters);  // Fake return address callee save.
 static constexpr uint32_t kX86CalleeSaveRefSpills =
     (1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI);
 static constexpr uint32_t kX86CalleeSaveArgSpills =
     (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+static constexpr uint32_t kX86CalleeSaveEverythingSpills =
+    (1 << art::x86::EAX) | (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+
 static constexpr uint32_t kX86CalleeSaveFpArgSpills =
     (1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
     (1 << art::x86::XMM2) | (1 << art::x86::XMM3);
+static constexpr uint32_t kX86CalleeSaveFpEverythingSpills =
+    (1 << art::x86::XMM0) | (1 << art::x86::XMM1) |
+    (1 << art::x86::XMM2) | (1 << art::x86::XMM3) |
+    (1 << art::x86::XMM4) | (1 << art::x86::XMM5) |
+    (1 << art::x86::XMM6) | (1 << art::x86::XMM7);
 
 constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kX86CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
-      (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
+  return kX86CalleeSaveAlwaysSpills | kX86CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t X86CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-    return type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0;
+    return (type == Runtime::kSaveRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) |
+        (type == Runtime::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(X86CalleeSaveCoreSpills(type)) /* gprs */ +
                   2 * POPCOUNT(X86CalleeSaveFpSpills(type)) /* fprs */ +
-                  1 /* Method* */) * kX86PointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kX86PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo X86CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index c39d122..241650e 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -20,6 +20,7 @@
 #include <sys/types.h>
 
 #include "asm_support_x86.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "thread-inl.h"
 #include "thread_list.h"
@@ -136,7 +137,7 @@
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
-  CHECK_EQ(THREAD_SELF_OFFSET, SelfOffset<4>().Int32Value());
+  CHECK_EQ(THREAD_SELF_OFFSET, SelfOffset<PointerSize::k32>().Int32Value());
   __asm__ __volatile__("movl %%fs:(%1), %0"
       : "=r"(self_check)  // output
       : "r"(THREAD_SELF_OFFSET)  // input
@@ -144,9 +145,9 @@
   CHECK_EQ(self_check, this);
 
   // Sanity check other offsets.
-  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<4>().Int32Value());
-  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<4>().Int32Value());
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value());
+  CHECK_EQ(THREAD_ID_OFFSET, ThinLockIdOffset<PointerSize::k32>().Int32Value());
 }
 
 void Thread::CleanupCpu() {
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index cf0039c..0728f99 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -31,7 +31,8 @@
     // Clang/llvm does not support .altmacro. However, the clang/llvm preprocessor doesn't
     // separate the backslash and parameter by a space. Everything just works.
     #define RAW_VAR(name) \name
-    #define VAR(name) SYMBOL(\name)
+    #define VAR(name) \name
+    #define CALLVAR(name) SYMBOL(\name)
     #define PLT_VAR(name) \name@PLT
     #define REG_VAR(name) %\name
     #define CALL_MACRO(name) \name
@@ -45,6 +46,7 @@
     .altmacro
     #define RAW_VAR(name) name&
     #define VAR(name) name&
+    #define CALLVAR(name) SYMBOL(name&)
     #define PLT_VAR(name) name&@PLT
     #define REG_VAR(name) %name
     #define CALL_MACRO(name) name&
@@ -52,7 +54,7 @@
 
 #define LITERAL(value) $value
 #if defined(__APPLE__)
-    #define MACRO_LITERAL(value) $$(value)
+    #define MACRO_LITERAL(value) $(value)
 #else
     #define MACRO_LITERAL(value) $value
 #endif
@@ -110,10 +112,10 @@
 // for mac builds.
 MACRO1(DEFINE_FUNCTION, c_name)
     FUNCTION_TYPE(SYMBOL(\c_name))
-    ASM_HIDDEN SYMBOL(\c_name)
-    .globl VAR(c_name)
+    ASM_HIDDEN CALLVAR(c_name)
+    .globl CALLVAR(c_name)
     ALIGN_FUNCTION_ENTRY
-VAR(c_name):
+CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
     CFI_DEF_CFA(rsp, 8)
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index eddd172..a4446d3 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -19,8 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES (64 + 4*8)
+#define FRAME_SIZE_SAVE_REFS_ONLY (64 + 4*8)
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS (112 + 12*8)
+#define FRAME_SIZE_SAVE_EVERYTHING (144 + 16*8)
 
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index bd6df70..42b9699 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -28,11 +28,29 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
-                                                   const mirror::Class* ref_class);
+extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass,
+                                                 const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
-extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -82,7 +100,37 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (RSP) to pass arguments.
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
+  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
+  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
+  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
+  // x86-64 has only 16 core registers.
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 #endif  // __APPLE__
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index aba7234..0840f89 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,10 +74,9 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2, bool prefers_locked_add,
-                               bool has_POPCNT)
+                               bool has_AVX, bool has_AVX2, bool has_POPCNT)
       : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                  has_AVX2, prefers_locked_add, has_POPCNT) {
+                                  has_AVX2, has_POPCNT) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 78aeacf..f2b2cd8 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 562ee2d..647fe1d 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -42,9 +42,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
      */
-MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -68,7 +68,7 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for save all callee save frame method.
-    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the top quick frame.
@@ -76,17 +76,17 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 6 * 8 + 4 * 8 + 8 + 8)
+#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
      */
-MACRO0(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_REFS_ONLY_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -110,7 +110,7 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for refs only callee save frame method.
-    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
     // Store rsp as the stop quick frame.
@@ -118,13 +118,13 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 6 * 8 + 4 * 8 + 8 + 8)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
-MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
     movq 8(%rsp), %xmm12
     movq 16(%rsp), %xmm13
     movq 24(%rsp), %xmm14
@@ -142,9 +142,9 @@
 
     /*
      * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
+     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
      */
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
 #if defined(__APPLE__)
     int3
     int3
@@ -165,10 +165,10 @@
     PUSH rdx  // Quick arg 2.
     PUSH rcx  // Quick arg 3.
     // Create space for FPR args and create 2 slots for ArtMethod*.
-    subq MACRO_LITERAL(80 + 4 * 8), %rsp
-    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
+    subq MACRO_LITERAL(16 + 12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(16 + 12 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
-    movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
+    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
     // Save FPRs.
     movq %xmm0, 16(%rsp)
     movq %xmm1, 24(%rsp)
@@ -189,13 +189,13 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
     // Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8)
+#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected."
 #endif
 #endif  // __APPLE__
 END_MACRO
 
-MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI)
+MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     PUSH r15  // Callee save.
     PUSH r14  // Callee save.
@@ -230,7 +230,7 @@
     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
 END_MACRO
 
-MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
     // Restore FPRs.
     movq 16(%rsp), %xmm0
     movq 24(%rsp), %xmm1
@@ -260,13 +260,115 @@
     POP r15
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+#if defined(__APPLE__)
+    int3
+    int3
+#else
+    // Save core registers from highest to lowest to agree with core spills bitmap.
+    PUSH r15
+    PUSH r14
+    PUSH r13
+    PUSH r12
+    PUSH r11
+    PUSH r10
+    PUSH r9
+    PUSH r8
+    PUSH rdi
+    PUSH rsi
+    PUSH rbp
+    PUSH rbx
+    PUSH rdx
+    PUSH rcx
+    PUSH rax
+    // Create space for FPRs and stack alignment padding.
+    subq MACRO_LITERAL(8 + 16 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
+    // R10 := Runtime::Current()
+    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
+    movq (%r10), %r10
+    // Save FPRs.
+    movq %xmm0, 8(%rsp)
+    movq %xmm1, 16(%rsp)
+    movq %xmm2, 24(%rsp)
+    movq %xmm3, 32(%rsp)
+    movq %xmm4, 40(%rsp)
+    movq %xmm5, 48(%rsp)
+    movq %xmm6, 56(%rsp)
+    movq %xmm7, 64(%rsp)
+    movq %xmm8, 72(%rsp)
+    movq %xmm9, 80(%rsp)
+    movq %xmm10, 88(%rsp)
+    movq %xmm11, 96(%rsp)
+    movq %xmm12, 104(%rsp)
+    movq %xmm13, 112(%rsp)
+    movq %xmm14, 120(%rsp)
+    movq %xmm15, 128(%rsp)
+    // Push ArtMethod* for save everything frame method.
+    pushq RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(%r10)
+    CFI_ADJUST_CFA_OFFSET(8)
+    // Store rsp as the top quick frame.
+    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+    // Ugly compile-time check, but we only have the preprocessor.
+    // Last +8: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
+#endif
+#endif  // __APPLE__
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+    // Restore FPRs. Method and padding is still on the stack.
+    movq 16(%rsp), %xmm0
+    movq 24(%rsp), %xmm1
+    movq 32(%rsp), %xmm2
+    movq 40(%rsp), %xmm3
+    movq 48(%rsp), %xmm4
+    movq 56(%rsp), %xmm5
+    movq 64(%rsp), %xmm6
+    movq 72(%rsp), %xmm7
+    movq 80(%rsp), %xmm8
+    movq 88(%rsp), %xmm9
+    movq 96(%rsp), %xmm10
+    movq 104(%rsp), %xmm11
+    movq 112(%rsp), %xmm12
+    movq 120(%rsp), %xmm13
+    movq 128(%rsp), %xmm14
+    movq 136(%rsp), %xmm15
+
+    // Remove save everything callee save method, stack alignment padding and FPRs.
+    addq MACRO_LITERAL(16 + 16 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
+    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
+    POP rax
+    POP rcx
+    POP rdx
+    POP rbx
+    POP rbp
+    POP rsi
+    POP rdi
+    POP r8
+    POP r9
+    POP r10
+    POP r11
+    POP r12
+    POP r13
+    POP r14
+    POP r15
+END_MACRO
+
 
     /*
      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
      * exception is Thread::Current()->exception_.
      */
 MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME        // save callee saves for throw
     // (Thread*) setup
     movq %gs:THREAD_SELF_OFFSET, %rdi
     call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
@@ -275,30 +377,30 @@
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(arg1, Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call VAR(cxx_name)                 // cxx_name(Thread*)
+    call CALLVAR(cxx_name)             // cxx_name(Thread*)
     UNREACHABLE
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -309,6 +411,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -336,6 +443,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
      * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
@@ -353,18 +466,18 @@
      * Adapted from x86 code.
      */
 MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
     movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
     movq %rsp, %rcx                                        // pass SP
 
-    call VAR(cxx_name)                                     // cxx_name(arg1, arg2, Thread*, SP)
+    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
                                                            // save the code pointer
     movq %rax, %rdi
     movq %rdx, %rax
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     testq %rdi, %rdi
     jz 1f
@@ -691,57 +804,46 @@
 #endif  // __APPLE__
 END_FUNCTION art_quick_do_long_jump
 
-MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
-    // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    CALL_MACRO(return_macro)             // return or deliver exception
-    END_FUNCTION VAR(c_name)
-END_MACRO
-
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(arg0, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     CALL_MACRO(return_macro)             // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
-    call VAR(cxx_name)                   // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     CALL_MACRO(return_macro)             // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
     // Outgoing argument set up
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg1, arg2, arg3, arg4, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -749,11 +851,11 @@
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rsi                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0 is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -761,11 +863,11 @@
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rdx                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0 and arg1 are in rdi/rsi
     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
-    call VAR(cxx_name)                  // (arg0, arg1, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // (arg0, arg1, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -773,11 +875,11 @@
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
     movq 8(%rsp), %rcx                  // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                         // arg0, arg1, and arg2 are in rdi/rsi/rdx
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, referrer, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
     CALL_MACRO(return_macro)            // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -808,145 +910,456 @@
 END_MACRO
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+// Comment out allocators that have x86_64 specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 DEFINE_FUNCTION art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
     // RDI: type_idx, RSI: ArtMethod*, RAX: return value
     // RDX, RCX, R8, R9: free.
-    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx   // Load dex cache resolved types array
-                                                              // Load the class (edx)
+    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+                                                             // Load the class (edx)
     movl   0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
-    testl  %edx, %edx                                         // Check null class
+    testl  %edx, %edx                                      // Check null class
     jz     .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Check class status.
+                                                           // Check class status.
     cmpl   LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
     jne    .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // We don't need a fence (between the
-                                                              // the status and the access flag
-                                                              // loads) here because every load is
-                                                              // a load acquire on x86.
-                                                              // Check access flags has
-                                                              // kAccClassIsFinalizable
+                                                           // We don't need a fence (between the
+                                                           // the status and the access flag
+                                                           // loads) here because every load is
+                                                           // a load acquire on x86.
+                                                           // Check access flags has
+                                                           // kAccClassIsFinalizable
     testl  LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
     jnz    .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Check if the thread local
-                                                              // allocation stack has room.
-    movq   %gs:THREAD_SELF_OFFSET, %r8                        // r8 = thread
-    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx     // rcx = alloc stack top.
+                                                           // Check if the thread local
+                                                           // allocation stack has room.
+    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
+    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
     cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
     jae    .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Load the object size
+                                                           // Load the object size
     movl   MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %eax
-                                                              // Check if the size is for a thread
-                                                              // local allocation
+                                                           // Check if the size is for a thread
+                                                           // local allocation
     cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
     ja     .Lart_quick_alloc_object_rosalloc_slow_path
-                                                              // Compute the rosalloc bracket index
-                                                              // from the size.
-                                                              // Align up the size by the rosalloc
-                                                              // bracket quantum size and divide
-                                                              // by the quantum size and subtract
-                                                              // by 1. This code is a shorter but
-                                                              // equivalent version.
+                                                           // Compute the rosalloc bracket index
+                                                           // from the size.
+                                                           // Align up the size by the rosalloc
+                                                           // bracket quantum size and divide
+                                                           // by the quantum size and subtract
+                                                           // by 1. This code is a shorter but
+                                                           // equivalent version.
     subq   LITERAL(1), %rax
     shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
-                                                              // Load the rosalloc run (r9)
+                                                           // Load the rosalloc run (r9)
     movq   THREAD_ROSALLOC_RUNS_OFFSET(%r8, %rax, __SIZEOF_POINTER__), %r9
-                                                              // Load the free list head (rax). This
-                                                              // will be the return val.
+                                                           // Load the free list head (rax). This
+                                                           // will be the return val.
     movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
     testq  %rax, %rax
     jz     .Lart_quick_alloc_object_rosalloc_slow_path
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
-                                                              // Push the new object onto the thread
-                                                              // local allocation stack and
-                                                              // increment the thread local
-                                                              // allocation stack top.
+                                                           // Push the new object onto the thread
+                                                           // local allocation stack and
+                                                           // increment the thread local
+                                                           // allocation stack top.
     movl   %eax, (%rcx)
     addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
     movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
-                                                              // Load the next pointer of the head
-                                                              // and update the list head with the
-                                                              // next pointer.
+                                                           // Load the next pointer of the head
+                                                           // and update the list head with the
+                                                           // next pointer.
     movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
     movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
-                                                              // Store the class pointer in the
-                                                              // header. This also overwrites the
-                                                              // next pointer. The offsets are
-                                                              // asserted to match.
+                                                           // Store the class pointer in the
+                                                           // header. This also overwrites the
+                                                           // next pointer. The offsets are
+                                                           // asserted to match.
 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
 #error "Class pointer needs to overwrite next pointer."
 #endif
     POISON_HEAP_REF edx
     movl   %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
-                                                              // Decrement the size of the free list
+                                                           // Decrement the size of the free list
     decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
-                                                              // No fence necessary for x86.
+                                                           // No fence necessary for x86.
     ret
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                         // save ref containing registers for GC
+    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
     // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdx                         // pass Thread::Current()
-    call SYMBOL(artAllocObjectFromCodeRosAlloc)               // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                       // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                   // return or deliver exception
+    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)            // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
 
-// A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
+MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
+    testl %edx, %edx                                       // Check null class
+    jz   RAW_VAR(slowPathLabel)
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
+END_MACRO
+
+// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
+//
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
+MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
+                                                           // Check class status.
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+    jne  RAW_VAR(slowPathLabel)
+                                                           // No fake dependence needed on x86
+                                                           // between status and flags load,
+                                                           // since each load is a load-acquire,
+                                                           // no loads reordering.
+                                                           // Check access flags has
+                                                           // kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+    jnz  RAW_VAR(slowPathLabel)
+    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
+END_MACRO
+
+// The fast path code for art_quick_alloc_object_initialized_region_tlab.
+//
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
+MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
+    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
+    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
+    leaq OBJECT_ALIGNMENT_MASK(%rax, %rcx), %rcx               // Add size to pos, note that these
+                                                               // are both 32 bit ints, overflow
+                                                               // will cause the add to be past the
+                                                               // end of the thread local region.
+                                                               // Also sneak in alignment mask add.
+    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %rcx        // Align the size by 8. (addr + 7) &
+                                                               // ~7.
+    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
+    ja   RAW_VAR(slowPathLabel)
+    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
+    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increase thread_local_objects.
+                                                               // Store the class pointer in the
+                                                               // header.
+                                                               // No fence needed for x86.
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    ret                                                        // Fast path succeeded.
+END_MACRO
+
+// The fast path code for art_quick_alloc_array_region_tlab.
+// Inputs: RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod* method
+// Temps: RCX: the class, r8, r9
+// Output: RAX: return value.
+MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, slowPathLabel)
+    movq %rcx, %r8                                             // Save class for later
+    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx        // Load component type.
+    UNPOISON_HEAP_REF ecx
+    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
+    shrq LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx        // Get component size shift.
+    movq %rsi, %r9
+    salq %cl, %r9                                              // Calculate array count shifted.
+    // Add array header + alignment rounding.
+    addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
+    // Add 4 extra bytes if we are doing a long array.
+    addq LITERAL(1), %rcx
+    andq LITERAL(4), %rcx
+    addq %rcx, %r9
+    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
+#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
+#error Long array data offset must be 4 greater than int array data offset.
+#endif
+    // Mask out the unaligned part to make sure we are 8 byte aligned.
+    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
+    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
+    addq %rax, %r9
+    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
+    ja   RAW_VAR(slowPathLabel)
+    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
+    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
+                                                               // Store the class pointer in the
+                                                               // header.
+                                                               // No fence needed for x86.
+    POISON_HEAP_REF r8d
+    movl %r8d, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
+    ret                                                        // Fast path succeeded.
+END_MACRO
+
+// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
+    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread::Current()
+    call CALLVAR(cxx_name)                                 // cxx_name(arg0, arg1, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
+END_MACRO
+
+// The slow path code for art_quick_alloc_array_region_tlab.
+MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name)
+    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rcx                          // pass Thread::Current()
+    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, arg2, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+END_MACRO
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-    // TODO: Add read barrier when this function is used.
-    // Note this function can/should implement read barrier fast path only
-    // (no read barrier slow path) because this is the fast path of tlab allocation.
-    // We can fall back to the allocation slow path to do the read barrier slow path.
 #if defined(USE_READ_BARRIER)
     int3
     int3
 #endif
     // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
-    // TODO: Add read barrier when this function is used.
     // Might need to break down into multiple instructions to get the base address in a register.
                                                                // Load the class
     movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
-    testl %edx, %edx                                           // Check null class
-    jz   .Lart_quick_alloc_object_tlab_slow_path
-                                                               // Check class status.
-    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
-    jne  .Lart_quick_alloc_object_tlab_slow_path
-                                                               // Check access flags has kAccClassIsFinalizable
-    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
-    jnz  .Lart_quick_alloc_object_tlab_slow_path
-    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
-    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
-    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
-    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
-    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos.
-    addq %rax, %rcx                                            // Add the object size.
-    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
-    ja   .Lart_quick_alloc_object_tlab_slow_path
-    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
-    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increment thread_local_objects.
-                                                               // Store the class pointer in the header.
-                                                               // No fence needed for x86.
-    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
-    ret                                                        // Fast path succeeded.
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                          // save ref containing registers for GC
-    // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
-    call SYMBOL(artAllocObjectFromCodeTLAB)                    // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
 END_FUNCTION art_quick_alloc_object_tlab
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_array_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
+    // RCX: klass, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx      // Load dex cache resolved types array
+    movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx        // Load the class
+    // Null check so that we can load the lock word.
+    testl %ecx, %ecx
+    jz .Lart_quick_alloc_array_region_tlab_slow_path
 
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_region_tlab_slow_path
+.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    PUSH rdx
+    // Outgoing argument set up
+    movq %rcx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rcx
+    POP rdx
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_region_tlab_slow_path:
+    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_array_region_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_array_resolved_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
+    // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    movq %rdi, %rcx
+    // Already resolved, no null check.
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    PUSH rdx
+    // Outgoing argument set up
+    movq %rcx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rcx
+    POP rdx
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_resolved_region_tlab_slow_path:
+    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB
+END_FUNCTION art_quick_alloc_array_resolved_region_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: uint32_t type_idx, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+    movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx    // Load the class
+    // Null check so that we can load the lock word.
+    testl %edx, %edx
+    jz .Lart_quick_alloc_object_region_tlab_slow_path
+    // Test if the GC is marking.
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 avoid the read barrier.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jnz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    subq LITERAL(8), %rsp // 16 byte alignment
+    // Outgoing argument set up
+    movq %rdx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rdx
+    addq LITERAL(8), %rsp
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_object_region_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    movq %rdi, %rdx
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
+.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 avoid the read barrier.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jnz .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    subq LITERAL(8), %rsp // 16 byte alignment
+    // Outgoing argument set up
+    movq %rdx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rdx
+    addq LITERAL(8), %rsp
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
+END_FUNCTION art_quick_alloc_object_resolved_region_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    // Might need a special macro since rsi and edx is 32b/64b mismatched.
+    movq %rdi, %rdx
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
+.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 avoid the read barrier.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jnz .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    subq LITERAL(8), %rsp // 16 byte alignment
+    // Outgoing argument set up
+    movq %rdx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rdx
+    addq LITERAL(8), %rsp
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
+END_FUNCTION art_quick_alloc_object_initialized_region_tlab
+
+DEFINE_FUNCTION art_quick_resolve_string
+    movq 8(%rsp), %rcx                                         // get referrer
+    movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx         // get declaring class
+    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx  // get string dex cache
+    movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx
+    andq %rdi, %rdx
+    shlq LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %rdx
+    addq %rcx, %rdx
+    movq %rax, %rcx
+    movq (%rdx), %rdx
+    movq %rdx, %rax
+    movl %eax, %eax
+    shrq LITERAL(32), %rdx
+    cmp %rdx, %rdi
+    jne .Lart_quick_resolve_string_slow_path
+#ifdef USE_READ_BARRIER
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax)
+    jz .Lart_quick_resolve_string_slow_path
+#endif
+    ret
+.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME
+    movq %rcx, %rax
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
+    call SYMBOL(artResolveStringFromCode)       // artResolveStringFromCode(arg0, referrer, Thread*)
+    RESTORE_SAVE_REFS_ONLY_FRAME                // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END_FUNCTION art_quick_resolve_string
+
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -961,7 +1374,7 @@
     test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
     jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
     movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     test %ecx, %ecx
     jnz  .Lalready_thin                   // Lock word contains a thin lock.
     // unlocked case - edx: original lock word, edi: obj.
@@ -976,9 +1389,9 @@
     cmpw %cx, %dx                         // do we hold the lock already?
     jne  .Lslow_lock
     movl %edx, %ecx                       // copy the lock word to check count overflow.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
-    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set
+    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
     jne  .Lslow_lock                      // count overflowed so go slow
     movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
@@ -987,18 +1400,18 @@
     jnz  .Lretry_lock                     // cmpxchg failed retry
     ret
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
 DEFINE_FUNCTION art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object_no_inline
 
@@ -1013,12 +1426,12 @@
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     movl %ecx, %edx                       // copy the lock word to detect new count of 0.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
     cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
     jae  .Lrecursive_thin_unlock
     // update lockword, cmpxchg necessary for read barrier bits.
     movl %ecx, %eax                       // eax: old lock word.
-    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
+    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
 #ifndef USE_READ_BARRIER
     movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
 #else
@@ -1038,18 +1451,18 @@
 #endif
     ret
 .Lslow_unlock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
 DEFINE_FUNCTION art_quick_unlock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
@@ -1075,7 +1488,7 @@
     CFI_ADJUST_CFA_OFFSET(-8)
     POP rsi                           // Pop arguments
     POP rdi
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
     call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
     UNREACHABLE
@@ -1252,7 +1665,7 @@
     POP  rsi
     POP  rdi
 
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
 
     // Outgoing argument set up.
     movq %rdx, %rsi                         // Pass arg 2 = value.
@@ -1268,7 +1681,14 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_quick_test_suspend
+    SETUP_SAVE_EVERYTHING_FRAME                 // save everything for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
+    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
+    ret
+END_FUNCTION art_quick_test_suspend
 
 UNIMPLEMENTED art_quick_ldiv
 UNIMPLEMENTED art_quick_lmod
@@ -1308,22 +1728,22 @@
 DEFINE_FUNCTION art_quick_set64_static
                                          // new_val is already in %rdx
     movq 8(%rsp), %rsi                   // pass referrer
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
                                          // field_idx is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rcx    // pass Thread::Current()
     call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
     RETURN_IF_EAX_ZERO                   // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
 
     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
     movq %rsp, %rcx                         // Pass SP.
     call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     movq %rax, %xmm0                        // Copy return value in case of float returns.
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_proxy_invoke_handler
@@ -1366,13 +1786,13 @@
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
     movq %gs:THREAD_SELF_OFFSET, %rdx
     movq %rsp, %rcx
     call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
     movq %rax, %r10               // Remember returned code pointer in R10.
     movq (%rsp), %rdi             // Load called method into RDI.
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
     testq %r10, %r10              // If code pointer is null goto deliver pending exception.
     jz 1f
     jmp *%r10                     // Tail call into method.
@@ -1457,7 +1877,7 @@
      * Called to do a generic JNI down-call
      */
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
+    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
 
     movq %rsp, %rbp                 // save SP at (old) callee-save frame
     CFI_DEF_CFA_REGISTER(rbp)
@@ -1590,11 +2010,11 @@
      * RSI, RDX, RCX, R8, R9 are arguments to that method.
      */
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
-    movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
-    movq %rsp, %rdx                        // RDX := sp
+    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
+    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
+    movq %rsp, %rdx                    // RDX := sp
     call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
     movq %rax, %xmm0                   // Place return value also into floating point return value.
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_to_interpreter_bridge
@@ -1607,12 +2027,12 @@
     int3
     int3
 #else
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_AND_ARGS_FRAME
 
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
 
     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
-    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %rcx   // Pass return PC.
+    movq FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp), %rcx   // Pass return PC.
 
     call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
 
@@ -1620,9 +2040,9 @@
     movq %r12, %rdi               // Reload method pointer.
 
     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
-    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
+    movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
 
-    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jmp *%rax                     // Tail call to intended method.
 #endif  // __APPLE__
@@ -1631,7 +2051,7 @@
 DEFINE_FUNCTION art_quick_instrumentation_exit
     pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
 
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    SETUP_SAVE_REFS_ONLY_FRAME
 
     // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
     // we would need to fully restore it. As there are a good number of callee-save registers, it
@@ -1658,7 +2078,7 @@
     CFI_ADJUST_CFA_OFFSET(-8)
     POP rax                   // Restore integer result.
 
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RESTORE_SAVE_REFS_ONLY_FRAME
 
     addq LITERAL(8), %rsp     // Drop fake return pc.
 
@@ -1672,7 +2092,7 @@
 DEFINE_FUNCTION art_quick_deoptimize
     pushq %rsi                         // Entry point for a jump. Fake that we were called.
                                        // Use hidden arg.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
                                        // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
     call SYMBOL(artDeoptimize)         // artDeoptimize(Thread*)
@@ -1684,7 +2104,7 @@
      * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
                                                 // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi           // Pass Thread.
     call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
@@ -1754,16 +2174,118 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
-DEFINE_FUNCTION art_quick_read_barrier_mark
-    SETUP_FP_CALLEE_SAVE_FRAME
-    subq LITERAL(8), %rsp           // Alignment padding.
-    CFI_ADJUST_CFA_OFFSET(8)
-    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
-    addq LITERAL(8), %rsp
-    CFI_ADJUST_CFA_OFFSET(-8)
-    RESTORE_FP_CALLEE_SAVE_FRAME
+// Create a function `name` calling the ReadBarrier::Mark routine,
+// getting its argument and returning its result through register
+// `reg`, saving and restoring all caller-save registers.
+//
+// The generated function follows a non-standard runtime calling
+// convention:
+// - register `reg` (which may be different from RDI) is used to pass
+//   the (sole) argument of this function;
+// - register `reg` (which may be different from RAX) is used to return
+//   the result of this function (instead of RAX);
+// - if `reg` is different from `rdi`, RDI is treated like a normal
+//   (non-argument) caller-save register;
+// - if `reg` is different from `rax`, RAX is treated like a normal
+//   (non-result) caller-save register;
+// - everything else is the same as in the standard runtime calling
+//   convention (e.g. standard callee-save registers are preserved).
+MACRO2(READ_BARRIER_MARK_REG, name, reg)
+    DEFINE_FUNCTION VAR(name)
+    // Null check so that we can load the lock word.
+    testq REG_VAR(reg), REG_VAR(reg)
+    jz .Lret_rb_\name
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+    jz .Lslow_rb_\name
     ret
-END_FUNCTION art_quick_read_barrier_slow
+.Lslow_rb_\name:
+    // Save all potentially live caller-save core registers.
+    PUSH rax
+    PUSH rcx
+    PUSH rdx
+    PUSH rsi
+    PUSH rdi
+    PUSH r8
+    PUSH r9
+    PUSH r10
+    PUSH r11
+    // Create space for caller-save floating-point registers.
+    subq MACRO_LITERAL(12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(12 * 8)
+    // Save all potentially live caller-save floating-point registers.
+    movq %xmm0, 0(%rsp)
+    movq %xmm1, 8(%rsp)
+    movq %xmm2, 16(%rsp)
+    movq %xmm3, 24(%rsp)
+    movq %xmm4, 32(%rsp)
+    movq %xmm5, 40(%rsp)
+    movq %xmm6, 48(%rsp)
+    movq %xmm7, 56(%rsp)
+    movq %xmm8, 64(%rsp)
+    movq %xmm9, 72(%rsp)
+    movq %xmm10, 80(%rsp)
+    movq %xmm11, 88(%rsp)
+    SETUP_FP_CALLEE_SAVE_FRAME
+
+    .ifnc RAW_VAR(reg), rdi
+      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
+    .endif
+    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
+    .ifnc RAW_VAR(reg), rax
+      movq %rax, REG_VAR(reg)       // Return result into `reg`.
+    .endif
+
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    // Restore floating-point registers.
+    movq 0(%rsp), %xmm0
+    movq 8(%rsp), %xmm1
+    movq 16(%rsp), %xmm2
+    movq 24(%rsp), %xmm3
+    movq 32(%rsp), %xmm4
+    movq 40(%rsp), %xmm5
+    movq 48(%rsp), %xmm6
+    movq 56(%rsp), %xmm7
+    movq 64(%rsp), %xmm8
+    movq 72(%rsp), %xmm9
+    movq 80(%rsp), %xmm10
+    movq 88(%rsp), %xmm11
+    // Remove floating-point registers.
+    addq MACRO_LITERAL(12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(12 * 8))
+    // Restore core regs, except `reg`, as it is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REG_NE r11, RAW_VAR(reg)
+    POP_REG_NE r10, RAW_VAR(reg)
+    POP_REG_NE r9, RAW_VAR(reg)
+    POP_REG_NE r8, RAW_VAR(reg)
+    POP_REG_NE rdi, RAW_VAR(reg)
+    POP_REG_NE rsi, RAW_VAR(reg)
+    POP_REG_NE rdx, RAW_VAR(reg)
+    POP_REG_NE rcx, RAW_VAR(reg)
+    POP_REG_NE rax, RAW_VAR(reg)
+.Lret_rb_\name:
+    ret
+    END_FUNCTION VAR(name)
+END_MACRO
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
+// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
+// cannot be used to pass arguments.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
 
 DEFINE_FUNCTION art_quick_read_barrier_slow
     SETUP_FP_CALLEE_SAVE_FRAME
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 72d7e99..867522f 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -25,12 +25,19 @@
 namespace art {
 namespace x86_64 {
 
+static constexpr uint32_t kX86_64CalleeSaveAlwaysSpills =
+    (1 << art::x86_64::kNumberOfCpuRegisters);  // Fake return address callee save.
 static constexpr uint32_t kX86_64CalleeSaveRefSpills =
     (1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) |
     (1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
 static constexpr uint32_t kX86_64CalleeSaveArgSpills =
     (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
     (1 << art::x86_64::R8) | (1 << art::x86_64::R9);
+static constexpr uint32_t kX86_64CalleeSaveEverythingSpills =
+    (1 << art::x86_64::RAX) | (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) |
+    (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) | (1 << art::x86_64::R8) |
+    (1 << art::x86_64::R9) | (1 << art::x86_64::R10) | (1 << art::x86_64::R11);
+
 static constexpr uint32_t kX86_64CalleeSaveFpArgSpills =
     (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
     (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
@@ -38,22 +45,30 @@
 static constexpr uint32_t kX86_64CalleeSaveFpSpills =
     (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) |
     (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15);
+static constexpr uint32_t kX86_64CalleeSaveFpEverythingSpills =
+    (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) |
+    (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) |
+    (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
+    (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7) |
+    (1 << art::x86_64::XMM8) | (1 << art::x86_64::XMM9) |
+    (1 << art::x86_64::XMM10) | (1 << art::x86_64::XMM11);
 
 constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kX86_64CalleeSaveRefSpills |
-      (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
-      (1 << art::x86_64::kNumberOfCpuRegisters);  // fake return address callee save;
+  return kX86_64CalleeSaveAlwaysSpills | kX86_64CalleeSaveRefSpills |
+      (type == Runtime::kSaveRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86_64CalleeSaveEverythingSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kX86_64CalleeSaveFpSpills |
-      (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
+      (type == Runtime::kSaveRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0) |
+      (type == Runtime::kSaveEverything ? kX86_64CalleeSaveFpEverythingSpills : 0);
 }
 
 constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
   return RoundUp((POPCOUNT(X86_64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(X86_64CalleeSaveFpSpills(type)) /* fprs */ +
-                  1 /* Method* */) * kX86_64PointerSize, kStackAlignment);
+                  1 /* Method* */) * static_cast<size_t>(kX86_64PointerSize), kStackAlignment);
 }
 
 constexpr QuickMethodFrameInfo X86_64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index d911497..a102858 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -33,9 +33,10 @@
 
 namespace art {
 
+template<ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtField::GetDeclaringClass() {
   GcRootSource gc_root_source(this);
-  mirror::Class* result = declaring_class_.Read(&gc_root_source);
+  mirror::Class* result = declaring_class_.Read<kReadBarrierOption>(&gc_root_source);
   DCHECK(result != nullptr);
   DCHECK(result->IsLoaded() || result->IsErroneous()) << result->GetStatus();
   return result;
@@ -122,21 +123,21 @@
 
 #define FIELD_GET(object, type) \
   DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK(object != nullptr) << PrettyField(this); \
-  DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
+  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
-    return object->GetField ## type ## Volatile(GetOffset()); \
+    return (object)->GetField ## type ## Volatile(GetOffset()); \
   } \
-  return object->GetField ## type(GetOffset());
+  return (object)->GetField ## type(GetOffset());
 
 #define FIELD_SET(object, type, value) \
   DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK(object != nullptr) << PrettyField(this); \
-  DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
+  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
-    object->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
+    (object)->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
   } else { \
-    object->SetField ## type<kTransactionActive>(GetOffset(), value); \
+    (object)->SetField ## type<kTransactionActive>(GetOffset(), value); \
   }
 
 inline uint8_t ArtField::GetBoolean(mirror::Object* object) {
diff --git a/runtime/art_field.h b/runtime/art_field.h
index b64b70f..aaccbf3 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -41,6 +41,7 @@
  public:
   ArtField();
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Class* GetDeclaringClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetDeclaringClass(mirror::Class *new_declaring_class)
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 7647ad6..1659f33 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -90,10 +90,12 @@
   if (kIsDebugBuild) {
     Thread* self = Thread::Current();
     if (!Locks::mutator_lock_->IsSharedHeld(self)) {
-      ScopedObjectAccess soa(self);
-      CHECK(IsRuntimeMethod() ||
-            GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
-            GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
+      if (self->IsThreadSuspensionAllowable()) {
+        ScopedObjectAccess soa(self);
+        CHECK(IsRuntimeMethod() ||
+              GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+              GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
+      }
     } else {
       // We cannot use SOA in this case. We might be holding the lock, but may not be in the
       // runnable state (e.g., during GC).
@@ -120,20 +122,25 @@
   return dex_method_index_;
 }
 
-inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(size_t pointer_size) {
+inline uint32_t ArtMethod::GetImtIndex() {
+  return GetDexMethodIndex() % ImTable::kSize;
+}
+
+inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(PointerSize pointer_size) {
   return GetNativePointer<ArtMethod**>(DexCacheResolvedMethodsOffset(pointer_size),
                                        pointer_size);
 }
 
-inline ArtMethod* ArtMethod::GetDexCacheResolvedMethod(uint16_t method_index, size_t ptr_size) {
+inline ArtMethod* ArtMethod::GetDexCacheResolvedMethod(uint16_t method_index,
+                                                       PointerSize pointer_size) {
   // NOTE: Unchecked, i.e. not throwing AIOOB. We don't even know the length here
   // without accessing the DexCache and we don't want to do that in release build.
   DCHECK_LT(method_index,
-            GetInterfaceMethodIfProxy(ptr_size)->GetDeclaringClass()
+            GetInterfaceMethodIfProxy(pointer_size)->GetDeclaringClass()
                 ->GetDexCache()->NumResolvedMethods());
-  ArtMethod* method = mirror::DexCache::GetElementPtrSize(GetDexCacheResolvedMethods(ptr_size),
+  ArtMethod* method = mirror::DexCache::GetElementPtrSize(GetDexCacheResolvedMethods(pointer_size),
                                                           method_index,
-                                                          ptr_size);
+                                                          pointer_size);
   if (LIKELY(method != nullptr)) {
     auto* declaring_class = method->GetDeclaringClass();
     if (LIKELY(declaring_class == nullptr || !declaring_class->IsErroneous())) {
@@ -143,70 +150,72 @@
   return nullptr;
 }
 
-inline void ArtMethod::SetDexCacheResolvedMethod(uint16_t method_index, ArtMethod* new_method,
-                                                 size_t ptr_size) {
+inline void ArtMethod::SetDexCacheResolvedMethod(uint16_t method_index,
+                                                 ArtMethod* new_method,
+                                                 PointerSize pointer_size) {
   // NOTE: Unchecked, i.e. not throwing AIOOB. We don't even know the length here
   // without accessing the DexCache and we don't want to do that in release build.
   DCHECK_LT(method_index,
-            GetInterfaceMethodIfProxy(ptr_size)->GetDeclaringClass()
+            GetInterfaceMethodIfProxy(pointer_size)->GetDeclaringClass()
                 ->GetDexCache()->NumResolvedMethods());
   DCHECK(new_method == nullptr || new_method->GetDeclaringClass() != nullptr);
-  mirror::DexCache::SetElementPtrSize(GetDexCacheResolvedMethods(ptr_size),
+  mirror::DexCache::SetElementPtrSize(GetDexCacheResolvedMethods(pointer_size),
                                       method_index,
                                       new_method,
-                                      ptr_size);
+                                      pointer_size);
 }
 
-inline bool ArtMethod::HasDexCacheResolvedMethods(size_t pointer_size) {
+inline bool ArtMethod::HasDexCacheResolvedMethods(PointerSize pointer_size) {
   return GetDexCacheResolvedMethods(pointer_size) != nullptr;
 }
 
 inline bool ArtMethod::HasSameDexCacheResolvedMethods(ArtMethod** other_cache,
-                                                      size_t pointer_size) {
+                                                      PointerSize pointer_size) {
   return GetDexCacheResolvedMethods(pointer_size) == other_cache;
 }
 
-inline bool ArtMethod::HasSameDexCacheResolvedMethods(ArtMethod* other, size_t pointer_size) {
+inline bool ArtMethod::HasSameDexCacheResolvedMethods(ArtMethod* other, PointerSize pointer_size) {
   return GetDexCacheResolvedMethods(pointer_size) ==
       other->GetDexCacheResolvedMethods(pointer_size);
 }
 
-inline GcRoot<mirror::Class>* ArtMethod::GetDexCacheResolvedTypes(size_t pointer_size) {
+inline GcRoot<mirror::Class>* ArtMethod::GetDexCacheResolvedTypes(PointerSize pointer_size) {
   return GetNativePointer<GcRoot<mirror::Class>*>(DexCacheResolvedTypesOffset(pointer_size),
                                                   pointer_size);
 }
 
 template <bool kWithCheck>
-inline mirror::Class* ArtMethod::GetDexCacheResolvedType(uint32_t type_index, size_t ptr_size) {
+inline mirror::Class* ArtMethod::GetDexCacheResolvedType(uint32_t type_index,
+                                                         PointerSize pointer_size) {
   if (kWithCheck) {
     mirror::DexCache* dex_cache =
-        GetInterfaceMethodIfProxy(ptr_size)->GetDeclaringClass()->GetDexCache();
+        GetInterfaceMethodIfProxy(pointer_size)->GetDeclaringClass()->GetDexCache();
     if (UNLIKELY(type_index >= dex_cache->NumResolvedTypes())) {
       ThrowArrayIndexOutOfBoundsException(type_index, dex_cache->NumResolvedTypes());
       return nullptr;
     }
   }
-  mirror::Class* klass = GetDexCacheResolvedTypes(ptr_size)[type_index].Read();
+  mirror::Class* klass = GetDexCacheResolvedTypes(pointer_size)[type_index].Read();
   return (klass != nullptr && !klass->IsErroneous()) ? klass : nullptr;
 }
 
-inline bool ArtMethod::HasDexCacheResolvedTypes(size_t pointer_size) {
+inline bool ArtMethod::HasDexCacheResolvedTypes(PointerSize pointer_size) {
   return GetDexCacheResolvedTypes(pointer_size) != nullptr;
 }
 
 inline bool ArtMethod::HasSameDexCacheResolvedTypes(GcRoot<mirror::Class>* other_cache,
-                                                    size_t pointer_size) {
+                                                    PointerSize pointer_size) {
   return GetDexCacheResolvedTypes(pointer_size) == other_cache;
 }
 
-inline bool ArtMethod::HasSameDexCacheResolvedTypes(ArtMethod* other, size_t pointer_size) {
+inline bool ArtMethod::HasSameDexCacheResolvedTypes(ArtMethod* other, PointerSize pointer_size) {
   return GetDexCacheResolvedTypes(pointer_size) == other->GetDexCacheResolvedTypes(pointer_size);
 }
 
 inline mirror::Class* ArtMethod::GetClassFromTypeIndex(uint16_t type_idx,
                                                        bool resolve,
-                                                       size_t ptr_size) {
-  mirror::Class* type = GetDexCacheResolvedType(type_idx, ptr_size);
+                                                       PointerSize pointer_size) {
+  mirror::Class* type = GetDexCacheResolvedType(type_idx, pointer_size);
   if (type == nullptr && resolve) {
     type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, this);
     CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
@@ -313,11 +322,11 @@
     return "<runtime internal resolution method>";
   } else if (this == runtime->GetImtConflictMethod()) {
     return "<runtime internal imt conflict method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveAll)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
     return "<runtime internal callee-save all registers method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kRefsOnly)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly)) {
     return "<runtime internal callee-save reference registers method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs)) {
+  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs)) {
     return "<runtime internal callee-save reference and argument registers method>";
   } else {
     return "<unknown runtime internal method>";
@@ -328,9 +337,9 @@
   return GetDeclaringClass()->GetDexFile().GetCodeItem(GetCodeItemOffset());
 }
 
-inline bool ArtMethod::IsResolvedTypeIdx(uint16_t type_idx, size_t ptr_size) {
+inline bool ArtMethod::IsResolvedTypeIdx(uint16_t type_idx, PointerSize pointer_size) {
   DCHECK(!IsProxyMethod());
-  return GetDexCacheResolvedType(type_idx, ptr_size) != nullptr;
+  return GetDexCacheResolvedType(type_idx, pointer_size) != nullptr;
 }
 
 inline int32_t ArtMethod::GetLineNumFromDexPC(uint32_t dex_pc) {
@@ -395,11 +404,12 @@
   return GetDeclaringClass()->GetDexCache();
 }
 
+template<ReadBarrierOption kReadBarrierOption>
 inline bool ArtMethod::IsProxyMethod() {
-  return GetDeclaringClass()->IsProxyClass();
+  return GetDeclaringClass<kReadBarrierOption>()->IsProxyClass();
 }
 
-inline ArtMethod* ArtMethod::GetInterfaceMethodIfProxy(size_t pointer_size) {
+inline ArtMethod* ArtMethod::GetInterfaceMethodIfProxy(PointerSize pointer_size) {
   if (LIKELY(!IsProxyMethod())) {
     return this;
   }
@@ -415,22 +425,24 @@
 }
 
 inline void ArtMethod::SetDexCacheResolvedMethods(ArtMethod** new_dex_cache_methods,
-                                                  size_t ptr_size) {
-  SetNativePointer(DexCacheResolvedMethodsOffset(ptr_size), new_dex_cache_methods, ptr_size);
+                                                  PointerSize pointer_size) {
+  SetNativePointer(DexCacheResolvedMethodsOffset(pointer_size),
+                   new_dex_cache_methods,
+                   pointer_size);
 }
 
 inline void ArtMethod::SetDexCacheResolvedTypes(GcRoot<mirror::Class>* new_dex_cache_types,
-                                                size_t ptr_size) {
-  SetNativePointer(DexCacheResolvedTypesOffset(ptr_size), new_dex_cache_types, ptr_size);
+                                                PointerSize pointer_size) {
+  SetNativePointer(DexCacheResolvedTypesOffset(pointer_size), new_dex_cache_types, pointer_size);
 }
 
-inline mirror::Class* ArtMethod::GetReturnType(bool resolve, size_t ptr_size) {
+inline mirror::Class* ArtMethod::GetReturnType(bool resolve, PointerSize pointer_size) {
   DCHECK(!IsProxyMethod());
   const DexFile* dex_file = GetDexFile();
   const DexFile::MethodId& method_id = dex_file->GetMethodId(GetDexMethodIndex());
   const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
   uint16_t return_type_idx = proto_id.return_type_idx_;
-  mirror::Class* type = GetDexCacheResolvedType(return_type_idx, ptr_size);
+  mirror::Class* type = GetDexCacheResolvedType(return_type_idx, pointer_size);
   if (type == nullptr && resolve) {
     type = Runtime::Current()->GetClassLinker()->ResolveType(return_type_idx, this);
     CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
@@ -438,24 +450,24 @@
   return type;
 }
 
-template<typename RootVisitorType>
-void ArtMethod::VisitRoots(RootVisitorType& visitor, size_t pointer_size) {
-  ArtMethod* interface_method = nullptr;
-  mirror::Class* klass = declaring_class_.Read();
-  if (LIKELY(klass != nullptr)) {
+template<ReadBarrierOption kReadBarrierOption, typename RootVisitorType>
+void ArtMethod::VisitRoots(RootVisitorType& visitor, PointerSize pointer_size) {
+  if (LIKELY(!declaring_class_.IsNull())) {
+    visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
+    mirror::Class* klass = declaring_class_.Read<kReadBarrierOption>();
     if (UNLIKELY(klass->IsProxyClass())) {
       // For normal methods, dex cache shortcuts will be visited through the declaring class.
       // However, for proxies we need to keep the interface method alive, so we visit its roots.
-      interface_method = mirror::DexCache::GetElementPtrSize(
+      ArtMethod* interface_method = mirror::DexCache::GetElementPtrSize(
           GetDexCacheResolvedMethods(pointer_size),
           GetDexMethodIndex(),
           pointer_size);
       DCHECK(interface_method != nullptr);
       DCHECK_EQ(interface_method,
-                Runtime::Current()->GetClassLinker()->FindMethodForProxy(klass, this));
+                Runtime::Current()->GetClassLinker()->FindMethodForProxy<kReadBarrierOption>(
+                    klass, this));
       interface_method->VisitRoots(visitor, pointer_size);
     }
-    visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
     // We know we don't have profiling information if the class hasn't been verified. Note
     // that this check also ensures the IsNative call can be made, as IsNative expects a fully
     // created class (and not a retired one).
@@ -463,7 +475,7 @@
       // Runtime methods and native methods use the same field as the profiling info for
       // storing their own data (jni entrypoint for native methods, and ImtConflictTable for
       // some runtime methods).
-      if (!IsNative() && !IsRuntimeMethod()) {
+      if (!IsNative<kReadBarrierOption>() && !IsRuntimeMethod()) {
         ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
         if (profiling_info != nullptr) {
           profiling_info->VisitRoots(visitor);
@@ -475,7 +487,7 @@
 
 template <typename Visitor>
 inline void ArtMethod::UpdateObjectsForImageRelocation(const Visitor& visitor,
-                                                       size_t pointer_size) {
+                                                       PointerSize pointer_size) {
   mirror::Class* old_class = GetDeclaringClassUnchecked<kWithoutReadBarrier>();
   mirror::Class* new_class = visitor(old_class);
   if (old_class != new_class) {
@@ -494,7 +506,7 @@
 }
 
 template <ReadBarrierOption kReadBarrierOption, typename Visitor>
-inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor, size_t pointer_size) {
+inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor, PointerSize pointer_size) {
   if (IsNative<kReadBarrierOption>()) {
     const void* old_native_code = GetEntryPointFromJniPtrSize(pointer_size);
     const void* new_native_code = visitor(old_native_code);
@@ -502,7 +514,7 @@
       SetEntryPointFromJniPtrSize(new_native_code, pointer_size);
     }
   } else {
-    DCHECK(GetEntryPointFromJniPtrSize(pointer_size) == nullptr);
+    DCHECK(GetDataPtrSize(pointer_size) == nullptr);
   }
   const void* old_code = GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
   const void* new_code = visitor(old_code);
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f86cb13..f9bc249 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -16,6 +16,8 @@
 
 #include "art_method.h"
 
+#include <cstddef>
+
 #include "arch/context.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
@@ -120,7 +122,7 @@
   return dex_file->GetMethodSignature(mid) == dex_file2->GetMethodSignature(mid2);
 }
 
-ArtMethod* ArtMethod::FindOverriddenMethod(size_t pointer_size) {
+ArtMethod* ArtMethod::FindOverriddenMethod(PointerSize pointer_size) {
   if (IsStatic()) {
     return nullptr;
   }
@@ -194,7 +196,7 @@
   // Default to handler not found.
   uint32_t found_dex_pc = DexFile::kDexNoIndex;
   // Iterate over the catch handlers associated with dex_pc.
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   for (CatchHandlerIterator it(*code_item, dex_pc); it.HasNext(); it.Next()) {
     uint16_t iter_type_idx = it.GetHandlerTypeIndex();
     // Catch all case
@@ -243,7 +245,7 @@
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
-    CHECK_STREQ(GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(), shorty);
+    CHECK_STREQ(GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(), shorty);
   }
 
   // Push a transition back into managed code onto the linked list in thread.
@@ -266,7 +268,7 @@
           self, this, receiver, args + 1, result, /*stay_in_interpreter*/ true);
     }
   } else {
-    DCHECK_EQ(runtime->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+    DCHECK_EQ(runtime->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
 
     constexpr bool kLogInvocationStartAndReturn = false;
     bool have_quick_code = GetEntryPointFromQuickCompiledCode() != nullptr;
@@ -332,6 +334,23 @@
   return GetDeclaringClass()->IsInterface();
 }
 
+bool ArtMethod::IsAnnotatedWithFastNative() {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  StackHandleScope<1> shs(self);
+
+  const DexFile& dex_file = GetDeclaringClass()->GetDexFile();
+
+  mirror::Class* fast_native_annotation =
+      soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_annotation_optimization_FastNative);
+  Handle<mirror::Class> fast_native_handle(shs.NewHandle(fast_native_annotation));
+
+  // Note: Resolves any method annotations' classes as a side-effect.
+  // -- This seems allowed by the spec since it says we can preload any classes
+  //    referenced by another classes's constant pool table.
+  return dex_file.IsMethodAnnotationPresent(this, fast_native_handle, DexFile::kDexVisibilityBuild);
+}
+
 bool ArtMethod::EqualParameters(Handle<mirror::ObjectArray<mirror::Class>> params) {
   auto* dex_cache = GetDexCache();
   auto* dex_file = dex_cache->GetDexFile();
@@ -458,7 +477,7 @@
 
   DCHECK(method_header->Contains(pc))
       << PrettyMethod(this)
-      << std::hex << pc << " " << oat_entry_point
+      << " " << std::hex << pc << " " << oat_entry_point
       << " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
   return method_header;
 }
@@ -474,7 +493,7 @@
   return Runtime::Current()->GetClassLinker()->GetOatMethodQuickCodeFor(this) != nullptr;
 }
 
-void ArtMethod::CopyFrom(ArtMethod* src, size_t image_pointer_size) {
+void ArtMethod::CopyFrom(ArtMethod* src, PointerSize image_pointer_size) {
   memcpy(reinterpret_cast<void*>(this), reinterpret_cast<const void*>(src),
          Size(image_pointer_size));
   declaring_class_ = GcRoot<mirror::Class>(const_cast<ArtMethod*>(src)->GetDeclaringClass());
@@ -497,4 +516,26 @@
   hotness_count_ = 0;
 }
 
+bool ArtMethod::IsImagePointerSize(PointerSize pointer_size) {
+  // Hijack this function to get access to PtrSizedFieldsOffset.
+  //
+  // Ensure that PrtSizedFieldsOffset is correct. We rely here on usually having both 32-bit and
+  // 64-bit builds.
+  static_assert(std::is_standard_layout<ArtMethod>::value, "ArtMethod is not standard layout.");
+  static_assert(
+      (sizeof(void*) != 4) ||
+          (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(PointerSize::k32)),
+      "Unexpected 32-bit class layout.");
+  static_assert(
+      (sizeof(void*) != 8) ||
+          (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(PointerSize::k64)),
+      "Unexpected 64-bit class layout.");
+
+  Runtime* runtime = Runtime::Current();
+  if (runtime == nullptr) {
+    return true;
+  }
+  return runtime->GetClassLinker()->GetImagePointerSize() == pointer_size;
+}
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index b65cb23..a90ef23 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -17,8 +17,11 @@
 #ifndef ART_RUNTIME_ART_METHOD_H_
 #define ART_RUNTIME_ART_METHOD_H_
 
+#include <cstddef>
+
 #include "base/bit_utils.h"
 #include "base/casts.h"
+#include "base/enums.h"
 #include "dex_file.h"
 #include "gc_root.h"
 #include "invoke_type.h"
@@ -63,7 +66,7 @@
   ImtConflictTable(ImtConflictTable* other,
                    ArtMethod* interface_method,
                    ArtMethod* implementation_method,
-                   size_t pointer_size) {
+                   PointerSize pointer_size) {
     const size_t count = other->NumEntries(pointer_size);
     for (size_t i = 0; i < count; ++i) {
       SetInterfaceMethod(i, pointer_size, other->GetInterfaceMethod(i, pointer_size));
@@ -77,30 +80,30 @@
   }
 
   // num_entries excludes the header.
-  ImtConflictTable(size_t num_entries, size_t pointer_size) {
+  ImtConflictTable(size_t num_entries, PointerSize pointer_size) {
     SetInterfaceMethod(num_entries, pointer_size, nullptr);
     SetImplementationMethod(num_entries, pointer_size, nullptr);
   }
 
   // Set an entry at an index.
-  void SetInterfaceMethod(size_t index, size_t pointer_size, ArtMethod* method) {
+  void SetInterfaceMethod(size_t index, PointerSize pointer_size, ArtMethod* method) {
     SetMethod(index * kMethodCount + kMethodInterface, pointer_size, method);
   }
 
-  void SetImplementationMethod(size_t index, size_t pointer_size, ArtMethod* method) {
+  void SetImplementationMethod(size_t index, PointerSize pointer_size, ArtMethod* method) {
     SetMethod(index * kMethodCount + kMethodImplementation, pointer_size, method);
   }
 
-  ArtMethod* GetInterfaceMethod(size_t index, size_t pointer_size) const {
+  ArtMethod* GetInterfaceMethod(size_t index, PointerSize pointer_size) const {
     return GetMethod(index * kMethodCount + kMethodInterface, pointer_size);
   }
 
-  ArtMethod* GetImplementationMethod(size_t index, size_t pointer_size) const {
+  ArtMethod* GetImplementationMethod(size_t index, PointerSize pointer_size) const {
     return GetMethod(index * kMethodCount + kMethodImplementation, pointer_size);
   }
 
   // Return true if two conflict tables are the same.
-  bool Equals(ImtConflictTable* other, size_t pointer_size) const {
+  bool Equals(ImtConflictTable* other, PointerSize pointer_size) const {
     size_t num = NumEntries(pointer_size);
     if (num != other->NumEntries(pointer_size)) {
       return false;
@@ -119,7 +122,7 @@
   // NO_THREAD_SAFETY_ANALYSIS for calling with held locks. Visitor is passed a pair of ArtMethod*
   // and also returns one. The order is <interface, implementation>.
   template<typename Visitor>
-  void Visit(const Visitor& visitor, size_t pointer_size) NO_THREAD_SAFETY_ANALYSIS {
+  void Visit(const Visitor& visitor, PointerSize pointer_size) NO_THREAD_SAFETY_ANALYSIS {
     uint32_t table_index = 0;
     for (;;) {
       ArtMethod* interface_method = GetInterfaceMethod(table_index, pointer_size);
@@ -141,7 +144,7 @@
 
   // Lookup the implementation ArtMethod associated to `interface_method`. Return null
   // if not found.
-  ArtMethod* Lookup(ArtMethod* interface_method, size_t pointer_size) const {
+  ArtMethod* Lookup(ArtMethod* interface_method, PointerSize pointer_size) const {
     uint32_t table_index = 0;
     for (;;) {
       ArtMethod* current_interface_method = GetInterfaceMethod(table_index, pointer_size);
@@ -157,7 +160,7 @@
   }
 
   // Compute the number of entries in this table.
-  size_t NumEntries(size_t pointer_size) const {
+  size_t NumEntries(PointerSize pointer_size) const {
     uint32_t table_index = 0;
     while (GetInterfaceMethod(table_index, pointer_size) != nullptr) {
       ++table_index;
@@ -166,41 +169,39 @@
   }
 
   // Compute the size in bytes taken by this table.
-  size_t ComputeSize(size_t pointer_size) const {
+  size_t ComputeSize(PointerSize pointer_size) const {
     // Add the end marker.
     return ComputeSize(NumEntries(pointer_size), pointer_size);
   }
 
   // Compute the size in bytes needed for copying the given `table` and add
   // one more entry.
-  static size_t ComputeSizeWithOneMoreEntry(ImtConflictTable* table, size_t pointer_size) {
+  static size_t ComputeSizeWithOneMoreEntry(ImtConflictTable* table, PointerSize pointer_size) {
     return table->ComputeSize(pointer_size) + EntrySize(pointer_size);
   }
 
   // Compute size with a fixed number of entries.
-  static size_t ComputeSize(size_t num_entries, size_t pointer_size) {
+  static size_t ComputeSize(size_t num_entries, PointerSize pointer_size) {
     return (num_entries + 1) * EntrySize(pointer_size);  // Add one for null terminator.
   }
 
-  static size_t EntrySize(size_t pointer_size) {
-    return pointer_size * static_cast<size_t>(kMethodCount);
+  static size_t EntrySize(PointerSize pointer_size) {
+    return static_cast<size_t>(pointer_size) * static_cast<size_t>(kMethodCount);
   }
 
  private:
-  ArtMethod* GetMethod(size_t index, size_t pointer_size) const {
-    if (pointer_size == 8) {
+  ArtMethod* GetMethod(size_t index, PointerSize pointer_size) const {
+    if (pointer_size == PointerSize::k64) {
       return reinterpret_cast<ArtMethod*>(static_cast<uintptr_t>(data64_[index]));
     } else {
-      DCHECK_EQ(pointer_size, 4u);
       return reinterpret_cast<ArtMethod*>(static_cast<uintptr_t>(data32_[index]));
     }
   }
 
-  void SetMethod(size_t index, size_t pointer_size, ArtMethod* method) {
-    if (pointer_size == 8) {
+  void SetMethod(size_t index, PointerSize pointer_size, ArtMethod* method) {
+    if (pointer_size == PointerSize::k64) {
       data64_[index] = dchecked_integral_cast<uint64_t>(reinterpret_cast<uintptr_t>(method));
     } else {
-      DCHECK_EQ(pointer_size, 4u);
       data32_[index] = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(method));
     }
   }
@@ -219,9 +220,9 @@
 class ArtMethod FINAL {
  public:
   ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0),
-      method_index_(0) { }
+      method_index_(0), hotness_count_(0) { }
 
-  ArtMethod(ArtMethod* src, size_t image_pointer_size) {
+  ArtMethod(ArtMethod* src, PointerSize image_pointer_size) {
     CopyFrom(src, image_pointer_size);
   }
 
@@ -356,6 +357,7 @@
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool SkipAccessChecks() {
@@ -373,6 +375,10 @@
     return (GetAccessFlags() & kAccMustCountLocks) != 0;
   }
 
+  // Checks to see if the method was annotated with @dalvik.annotation.optimization.FastNative
+  // -- Independent of kAccFastNative access flags.
+  bool IsAnnotatedWithFastNative();
+
   // Returns true if this method could be overridden by a default method.
   bool IsOverridableByDefaultMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -418,47 +424,52 @@
 
   ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ALWAYS_INLINE uint32_t GetImtIndex() SHARED_REQUIRES(Locks::mutator_lock_);
+
   void SetDexMethodIndex(uint32_t new_idx) {
     // Not called within a transaction.
     dex_method_index_ = new_idx;
   }
 
-  ALWAYS_INLINE ArtMethod** GetDexCacheResolvedMethods(size_t pointer_size)
+  ALWAYS_INLINE ArtMethod** GetDexCacheResolvedMethods(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  ALWAYS_INLINE ArtMethod* GetDexCacheResolvedMethod(uint16_t method_index, size_t ptr_size)
+  ALWAYS_INLINE ArtMethod* GetDexCacheResolvedMethod(uint16_t method_index,
+                                                     PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
   ALWAYS_INLINE void SetDexCacheResolvedMethod(uint16_t method_index,
                                                ArtMethod* new_method,
-                                               size_t ptr_size)
+                                               PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  ALWAYS_INLINE void SetDexCacheResolvedMethods(ArtMethod** new_dex_cache_methods, size_t ptr_size)
+  ALWAYS_INLINE void SetDexCacheResolvedMethods(ArtMethod** new_dex_cache_methods,
+                                                PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasDexCacheResolvedMethods(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasSameDexCacheResolvedMethods(ArtMethod* other, size_t pointer_size)
+  bool HasDexCacheResolvedMethods(PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool HasSameDexCacheResolvedMethods(ArtMethod* other, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasSameDexCacheResolvedMethods(ArtMethod** other_cache, size_t pointer_size)
+  bool HasSameDexCacheResolvedMethods(ArtMethod** other_cache, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <bool kWithCheck = true>
-  mirror::Class* GetDexCacheResolvedType(uint32_t type_idx, size_t ptr_size)
+  mirror::Class* GetDexCacheResolvedType(uint32_t type_idx, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void SetDexCacheResolvedTypes(GcRoot<mirror::Class>* new_dex_cache_types, size_t ptr_size)
+  void SetDexCacheResolvedTypes(GcRoot<mirror::Class>* new_dex_cache_types,
+                                PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasDexCacheResolvedTypes(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasSameDexCacheResolvedTypes(ArtMethod* other, size_t pointer_size)
+  bool HasDexCacheResolvedTypes(PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool HasSameDexCacheResolvedTypes(ArtMethod* other, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool HasSameDexCacheResolvedTypes(GcRoot<mirror::Class>* other_cache, size_t pointer_size)
+  bool HasSameDexCacheResolvedTypes(GcRoot<mirror::Class>* other_cache, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the Class* from the type index into this method's dex cache.
-  mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, size_t ptr_size)
+  mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if this method has the same name and signature of the other method.
   bool HasSameNameAndSignature(ArtMethod* other) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Find the method that this method overrides.
-  ArtMethod* FindOverriddenMethod(size_t pointer_size)
+  ArtMethod* FindOverriddenMethod(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Find the method index for this method within other_dexfile. If this method isn't present then
@@ -473,21 +484,22 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   const void* GetEntryPointFromQuickCompiledCode() {
-    return GetEntryPointFromQuickCompiledCodePtrSize(sizeof(void*));
+    return GetEntryPointFromQuickCompiledCodePtrSize(kRuntimePointerSize);
   }
-  ALWAYS_INLINE const void* GetEntryPointFromQuickCompiledCodePtrSize(size_t pointer_size) {
+  ALWAYS_INLINE const void* GetEntryPointFromQuickCompiledCodePtrSize(PointerSize pointer_size) {
     return GetNativePointer<const void*>(
         EntryPointFromQuickCompiledCodeOffset(pointer_size), pointer_size);
   }
 
   void SetEntryPointFromQuickCompiledCode(const void* entry_point_from_quick_compiled_code) {
     SetEntryPointFromQuickCompiledCodePtrSize(entry_point_from_quick_compiled_code,
-                                              sizeof(void*));
+                                              kRuntimePointerSize);
   }
   ALWAYS_INLINE void SetEntryPointFromQuickCompiledCodePtrSize(
-      const void* entry_point_from_quick_compiled_code, size_t pointer_size) {
+      const void* entry_point_from_quick_compiled_code, PointerSize pointer_size) {
     SetNativePointer(EntryPointFromQuickCompiledCodeOffset(pointer_size),
-                     entry_point_from_quick_compiled_code, pointer_size);
+                     entry_point_from_quick_compiled_code,
+                     pointer_size);
   }
 
   void RegisterNative(const void* native_method, bool is_fast)
@@ -495,66 +507,86 @@
 
   void UnregisterNative() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static MemberOffset DexCacheResolvedMethodsOffset(size_t pointer_size) {
+  static MemberOffset DexCacheResolvedMethodsOffset(PointerSize pointer_size) {
     return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, dex_cache_resolved_methods_) / sizeof(void*) * pointer_size);
+        PtrSizedFields, dex_cache_resolved_methods_) / sizeof(void*)
+            * static_cast<size_t>(pointer_size));
   }
 
-  static MemberOffset DexCacheResolvedTypesOffset(size_t pointer_size) {
+  static MemberOffset DexCacheResolvedTypesOffset(PointerSize pointer_size) {
     return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, dex_cache_resolved_types_) / sizeof(void*) * pointer_size);
+        PtrSizedFields, dex_cache_resolved_types_) / sizeof(void*)
+            * static_cast<size_t>(pointer_size));
   }
 
-  static MemberOffset EntryPointFromJniOffset(size_t pointer_size) {
+  static MemberOffset DataOffset(PointerSize pointer_size) {
     return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, entry_point_from_jni_) / sizeof(void*) * pointer_size);
+        PtrSizedFields, data_) / sizeof(void*) * static_cast<size_t>(pointer_size));
   }
 
-  static MemberOffset EntryPointFromQuickCompiledCodeOffset(size_t pointer_size) {
+  static MemberOffset EntryPointFromJniOffset(PointerSize pointer_size) {
+    return DataOffset(pointer_size);
+  }
+
+  static MemberOffset EntryPointFromQuickCompiledCodeOffset(PointerSize pointer_size) {
     return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
+        PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*)
+            * static_cast<size_t>(pointer_size));
   }
 
-  ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
-    return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size));
-  }
-
-  ImtConflictTable* GetImtConflictTable(size_t pointer_size) {
+  ImtConflictTable* GetImtConflictTable(PointerSize pointer_size) {
     DCHECK(IsRuntimeMethod());
-    return reinterpret_cast<ImtConflictTable*>(GetEntryPointFromJniPtrSize(pointer_size));
+    return reinterpret_cast<ImtConflictTable*>(GetDataPtrSize(pointer_size));
   }
 
-  ALWAYS_INLINE void SetImtConflictTable(ImtConflictTable* table, size_t pointer_size) {
-    SetEntryPointFromJniPtrSize(table, pointer_size);
+  ALWAYS_INLINE void SetImtConflictTable(ImtConflictTable* table, PointerSize pointer_size) {
+    DCHECK(IsRuntimeMethod());
+    SetDataPtrSize(table, pointer_size);
+  }
+
+  ProfilingInfo* GetProfilingInfo(PointerSize pointer_size) {
+    return reinterpret_cast<ProfilingInfo*>(GetDataPtrSize(pointer_size));
   }
 
   ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) {
-    SetEntryPointFromJniPtrSize(info, sizeof(void*));
+    SetDataPtrSize(info, kRuntimePointerSize);
   }
 
-  ALWAYS_INLINE void SetProfilingInfoPtrSize(ProfilingInfo* info, size_t pointer_size) {
-    SetEntryPointFromJniPtrSize(info, pointer_size);
+  ALWAYS_INLINE void SetProfilingInfoPtrSize(ProfilingInfo* info, PointerSize pointer_size) {
+    SetDataPtrSize(info, pointer_size);
   }
 
   static MemberOffset ProfilingInfoOffset() {
-    return EntryPointFromJniOffset(sizeof(void*));
+    DCHECK(IsImagePointerSize(kRuntimePointerSize));
+    return DataOffset(kRuntimePointerSize);
   }
 
   void* GetEntryPointFromJni() {
-    return GetEntryPointFromJniPtrSize(sizeof(void*));
+    DCHECK(IsNative());
+    return GetEntryPointFromJniPtrSize(kRuntimePointerSize);
   }
 
-  ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(size_t pointer_size) {
-    return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
+  ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(PointerSize pointer_size) {
+    return GetDataPtrSize(pointer_size);
   }
 
   void SetEntryPointFromJni(const void* entrypoint) {
     DCHECK(IsNative());
-    SetEntryPointFromJniPtrSize(entrypoint, sizeof(void*));
+    SetEntryPointFromJniPtrSize(entrypoint, kRuntimePointerSize);
   }
 
-  ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, size_t pointer_size) {
-    SetNativePointer(EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
+  ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, PointerSize pointer_size) {
+    SetDataPtrSize(entrypoint, pointer_size);
+  }
+
+  ALWAYS_INLINE void* GetDataPtrSize(PointerSize pointer_size) {
+    DCHECK(IsImagePointerSize(pointer_size));
+    return GetNativePointer<void*>(DataOffset(pointer_size), pointer_size);
+  }
+
+  ALWAYS_INLINE void SetDataPtrSize(const void* data, PointerSize pointer_size) {
+    DCHECK(IsImagePointerSize(pointer_size));
+    SetNativePointer(DataOffset(pointer_size), data, pointer_size);
   }
 
   // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
@@ -580,8 +612,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
-  template<typename RootVisitorType>
-  void VisitRoots(RootVisitorType& visitor, size_t pointer_size) NO_THREAD_SAFETY_ANALYSIS;
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename RootVisitorType>
+  void VisitRoots(RootVisitorType& visitor, PointerSize pointer_size) NO_THREAD_SAFETY_ANALYSIS;
 
   const DexFile* GetDexFile() SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -602,7 +634,8 @@
 
   const DexFile::CodeItem* GetCodeItem() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsResolvedTypeIdx(uint16_t type_idx, size_t ptr_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool IsResolvedTypeIdx(uint16_t type_idx, PointerSize pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   int32_t GetLineNumFromDexPC(uint32_t dex_pc) SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -623,14 +656,14 @@
 
   // May cause thread suspension due to GetClassFromTypeIdx calling ResolveType this caused a large
   // number of bugs at call sites.
-  mirror::Class* GetReturnType(bool resolve, size_t ptr_size)
+  mirror::Class* GetReturnType(bool resolve, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   mirror::ClassLoader* GetClassLoader() SHARED_REQUIRES(Locks::mutator_lock_);
 
   mirror::DexCache* GetDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy(size_t pointer_size)
+  ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // May cause thread suspension due to class resolution.
@@ -638,22 +671,22 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Size of an instance of this native class.
-  static size_t Size(size_t pointer_size) {
-    return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size) +
-        (sizeof(PtrSizedFields) / sizeof(void*)) * pointer_size;
+  static size_t Size(PointerSize pointer_size) {
+    return PtrSizedFieldsOffset(pointer_size) +
+        (sizeof(PtrSizedFields) / sizeof(void*)) * static_cast<size_t>(pointer_size);
   }
 
   // Alignment of an instance of this native class.
-  static size_t Alignment(size_t pointer_size) {
+  static size_t Alignment(PointerSize pointer_size) {
     // The ArtMethod alignment is the same as image pointer size. This differs from
     // alignof(ArtMethod) if cross-compiling with pointer_size != sizeof(void*).
-    return pointer_size;
+    return static_cast<size_t>(pointer_size);
   }
 
-  void CopyFrom(ArtMethod* src, size_t image_pointer_size)
+  void CopyFrom(ArtMethod* src, PointerSize image_pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size)
+  ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Note, hotness_counter_ updates are non-atomic but it doesn't need to be precise.  Also,
@@ -689,12 +722,13 @@
   // Update heap objects and non-entrypoint pointers by the passed in visitor for image relocation.
   // Does not use read barrier.
   template <typename Visitor>
-  ALWAYS_INLINE void UpdateObjectsForImageRelocation(const Visitor& visitor, size_t pointer_size)
+  ALWAYS_INLINE void UpdateObjectsForImageRelocation(const Visitor& visitor,
+                                                     PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update entry points by passing them through the visitor.
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
-  ALWAYS_INLINE void UpdateEntrypoints(const Visitor& visitor, size_t pointer_size);
+  ALWAYS_INLINE void UpdateEntrypoints(const Visitor& visitor, PointerSize pointer_size);
 
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
@@ -726,9 +760,7 @@
   // Fake padding field gets inserted here.
 
   // Must be the last fields in the method.
-  // PACKED(4) is necessary for the correctness of
-  // RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size).
-  struct PACKED(4) PtrSizedFields {
+  struct PtrSizedFields {
     // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
     ArtMethod** dex_cache_resolved_methods_;
 
@@ -737,7 +769,7 @@
 
     // Pointer to JNI function registered to this method, or a function to resolve the JNI function,
     // or the profiling data for non-native methods, or an ImtConflictTable.
-    void* entry_point_from_jni_;
+    void* data_;
 
     // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
     // the interpreter.
@@ -745,17 +777,20 @@
   } ptr_sized_fields_;
 
  private:
-  static size_t PtrSizedFieldsOffset(size_t pointer_size) {
-    // Round up to pointer size for padding field.
-    return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size);
+  static constexpr size_t PtrSizedFieldsOffset(PointerSize pointer_size) {
+    // Round up to pointer size for padding field. Tested in art_method.cc.
+    return RoundUp(offsetof(ArtMethod, hotness_count_) + sizeof(hotness_count_),
+                   static_cast<size_t>(pointer_size));
   }
 
+  // Compare given pointer size to the image pointer size.
+  static bool IsImagePointerSize(PointerSize pointer_size);
+
   template<typename T>
-  ALWAYS_INLINE T GetNativePointer(MemberOffset offset, size_t pointer_size) const {
+  ALWAYS_INLINE T GetNativePointer(MemberOffset offset, PointerSize pointer_size) const {
     static_assert(std::is_pointer<T>::value, "T must be a pointer type");
-    DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
     const auto addr = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
-    if (pointer_size == sizeof(uint32_t)) {
+    if (pointer_size == PointerSize::k32) {
       return reinterpret_cast<T>(*reinterpret_cast<const uint32_t*>(addr));
     } else {
       auto v = *reinterpret_cast<const uint64_t*>(addr);
@@ -764,11 +799,10 @@
   }
 
   template<typename T>
-  ALWAYS_INLINE void SetNativePointer(MemberOffset offset, T new_value, size_t pointer_size) {
+  ALWAYS_INLINE void SetNativePointer(MemberOffset offset, T new_value, PointerSize pointer_size) {
     static_assert(std::is_pointer<T>::value, "T must be a pointer type");
-    DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
     const auto addr = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
-    if (pointer_size == sizeof(uint32_t)) {
+    if (pointer_size == PointerSize::k32) {
       uintptr_t ptr = reinterpret_cast<uintptr_t>(new_value);
       *reinterpret_cast<uint32_t*>(addr) = dchecked_integral_cast<uint32_t>(ptr);
     } else {
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 21725d3..102b993 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -19,11 +19,15 @@
 
 #if defined(__cplusplus)
 #include "art_method.h"
+#include "base/bit_utils.h"
 #include "gc/allocator/rosalloc.h"
+#include "gc/heap.h"
 #include "jit/jit.h"
 #include "lock_word.h"
 #include "mirror/class.h"
+#include "mirror/dex_cache.h"
 #include "mirror/string.h"
+#include "utils/dex_cache_arrays_layout.h"
 #include "runtime.h"
 #include "thread.h"
 #endif
@@ -57,112 +61,70 @@
 
 #if defined(__LP64__)
 #define POINTER_SIZE_SHIFT 3
+#define POINTER_SIZE art::PointerSize::k64
 #else
 #define POINTER_SIZE_SHIFT 2
+#define POINTER_SIZE art::PointerSize::k32
 #endif
 ADD_TEST_EQ(static_cast<size_t>(1U << POINTER_SIZE_SHIFT),
             static_cast<size_t>(__SIZEOF_POINTER__))
 
-// Size of references to the heap on the stack.
-#define STACK_REFERENCE_SIZE 4
-ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>))
-
-// Size of heap references
-#define COMPRESSED_REFERENCE_SIZE 4
-ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE),
-            sizeof(art::mirror::CompressedReference<art::mirror::Object>))
-
-#define COMPRESSED_REFERENCE_SIZE_SHIFT 2
-ADD_TEST_EQ(static_cast<size_t>(1U << COMPRESSED_REFERENCE_SIZE_SHIFT),
-            static_cast<size_t>(COMPRESSED_REFERENCE_SIZE))
-
-// Note: these callee save methods loads require read barriers.
-// Offset of field Runtime::callee_save_methods_[kSaveAll]
-#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kSaveAll))
-
-// Offset of field Runtime::callee_save_methods_[kRefsOnly]
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsOnly))
-
-// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-#define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET (2 * 8)
-ADD_TEST_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET),
-            art::Runtime::GetCalleeSaveMethodOffset(art::Runtime::kRefsAndArgs))
-
-// Offset of field Thread::tls32_.state_and_flags.
-#define THREAD_FLAGS_OFFSET 0
-ADD_TEST_EQ(THREAD_FLAGS_OFFSET,
-            art::Thread::ThreadFlagsOffset<__SIZEOF_POINTER__>().Int32Value())
-
-// Offset of field Thread::tls32_.thin_lock_thread_id.
-#define THREAD_ID_OFFSET 12
-ADD_TEST_EQ(THREAD_ID_OFFSET,
-            art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
-
-// Offset of field Thread::tls32_.is_gc_marking.
-#define THREAD_IS_GC_MARKING_OFFSET 52
-ADD_TEST_EQ(THREAD_IS_GC_MARKING_OFFSET,
-            art::Thread::IsGcMarkingOffset<__SIZEOF_POINTER__>().Int32Value())
-
-// Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 128
-ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
-            art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
+// Import platform-independent constant defines from our autogenerated list.
+// Export new defines (for assembly use) by editing cpp-define-generator def files.
+#define DEFINE_CHECK_EQ ADD_TEST_EQ
+#include "generated/asm_support_gen.h"
 
 // Offset of field Thread::tlsPtr_.exception.
 #define THREAD_EXCEPTION_OFFSET (THREAD_CARD_TABLE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_EXCEPTION_OFFSET,
-            art::Thread::ExceptionOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ExceptionOffset<POINTER_SIZE>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.managed_stack.top_quick_frame_.
 #define THREAD_TOP_QUICK_FRAME_OFFSET (THREAD_CARD_TABLE_OFFSET + (3 * __SIZEOF_POINTER__))
 ADD_TEST_EQ(THREAD_TOP_QUICK_FRAME_OFFSET,
-            art::Thread::TopOfManagedStackOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::TopOfManagedStackOffset<POINTER_SIZE>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.self.
 #define THREAD_SELF_OFFSET (THREAD_CARD_TABLE_OFFSET + (9 * __SIZEOF_POINTER__))
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
-            art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::SelfOffset<POINTER_SIZE>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 168 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 199 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
-            art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + 2 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
-            art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalPosOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
 #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
-            art::Thread::ThreadLocalEndOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
-#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
+#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
-            art::Thread::MterpCurrentIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::MterpCurrentIBaseOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_default_ibase.
-#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_CURRENT_IBASE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_DEFAULT_IBASE_OFFSET,
-            art::Thread::MterpDefaultIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::MterpDefaultIBaseOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_alt_ibase.
-#define THREAD_ALT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 4 * __SIZEOF_POINTER__)
+#define THREAD_ALT_IBASE_OFFSET (THREAD_DEFAULT_IBASE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_ALT_IBASE_OFFSET,
-            art::Thread::MterpAltIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::MterpAltIBaseOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.rosalloc_runs.
-#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 5 * __SIZEOF_POINTER__)
+#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_ALT_IBASE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
-            art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::RosAllocRunsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.
 #define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 16 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET,
-            art::Thread::ThreadLocalAllocStackTopOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalAllocStackTopOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_end.
 #define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 17 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
-            art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value())
+            art::Thread::ThreadLocalAllocStackEndOffset<POINTER_SIZE>().Int32Value())
 
 // Offsets within ShadowFrame.
 #define SHADOWFRAME_LINK_OFFSET 0
@@ -199,17 +161,6 @@
 ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET,
             static_cast<int32_t>(art::ShadowFrame::VRegsOffset()))
 
-// Offsets within CodeItem
-#define CODEITEM_INSNS_OFFSET 16
-ADD_TEST_EQ(CODEITEM_INSNS_OFFSET,
-            static_cast<int32_t>(OFFSETOF_MEMBER(art::DexFile::CodeItem, insns_)))
-
-// Offsets within java.lang.Object.
-#define MIRROR_OBJECT_CLASS_OFFSET 0
-ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
-#define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
-ADD_TEST_EQ(MIRROR_OBJECT_LOCK_WORD_OFFSET, art::mirror::Object::MonitorOffset().Int32Value())
-
 #if defined(USE_BROOKS_READ_BARRIER)
 #define MIRROR_OBJECT_HEADER_SIZE 16
 #else
@@ -218,28 +169,25 @@
 ADD_TEST_EQ(size_t(MIRROR_OBJECT_HEADER_SIZE), sizeof(art::mirror::Object))
 
 // Offsets within java.lang.Class.
-#define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
-#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (36 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (64 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (96 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
-#define MIRROR_CLASS_STATUS_OFFSET (112 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET,
+            art::mirror::Class::PrimitiveTypeOffset().Int32Value())
+#define MIRROR_CLASS_STATUS_OFFSET (108 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
-#define MIRROR_CLASS_STATUS_INITIALIZED 10
-ADD_TEST_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED),
-            static_cast<uint32_t>(art::mirror::Class::kStatusInitialized))
-#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
-ADD_TEST_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE),
-            static_cast<uint32_t>(art::kAccClassIsFinalizable))
-#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT 31
-ADD_TEST_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE),
-            static_cast<uint32_t>(1U << ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT))
+#define PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT 16
+ADD_TEST_EQ(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT,
+            static_cast<int>(art::mirror::Class::kPrimitiveTypeSizeShiftShift))
 
 // Array offsets.
 #define MIRROR_ARRAY_LENGTH_OFFSET      MIRROR_OBJECT_HEADER_SIZE
@@ -289,118 +237,7 @@
 #define MIRROR_STRING_VALUE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
 
-// Offsets within java.lang.reflect.ArtMethod.
-#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_32 20
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_METHODS_OFFSET_32,
-            art::ArtMethod::DexCacheResolvedMethodsOffset(4).Int32Value())
 
-#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_64 24
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_METHODS_OFFSET_64,
-            art::ArtMethod::DexCacheResolvedMethodsOffset(8).Int32Value())
-
-#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_32 24
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_TYPES_OFFSET_32,
-            art::ArtMethod::DexCacheResolvedTypesOffset(4).Int32Value())
-
-#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_64 32
-ADD_TEST_EQ(ART_METHOD_DEX_CACHE_TYPES_OFFSET_64,
-            art::ArtMethod::DexCacheResolvedTypesOffset(8).Int32Value())
-
-#define ART_METHOD_JNI_OFFSET_32 28
-ADD_TEST_EQ(ART_METHOD_JNI_OFFSET_32,
-            art::ArtMethod::EntryPointFromJniOffset(4).Int32Value())
-
-#define ART_METHOD_JNI_OFFSET_64 40
-ADD_TEST_EQ(ART_METHOD_JNI_OFFSET_64,
-            art::ArtMethod::EntryPointFromJniOffset(8).Int32Value())
-
-#define ART_METHOD_QUICK_CODE_OFFSET_32 32
-ADD_TEST_EQ(ART_METHOD_QUICK_CODE_OFFSET_32,
-            art::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
-
-#define ART_METHOD_QUICK_CODE_OFFSET_64 48
-ADD_TEST_EQ(ART_METHOD_QUICK_CODE_OFFSET_64,
-            art::ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value())
-
-#define LOCK_WORD_STATE_SHIFT 30
-ADD_TEST_EQ(LOCK_WORD_STATE_SHIFT, static_cast<int32_t>(art::LockWord::kStateShift))
-
-#define LOCK_WORD_STATE_MASK 0xC0000000
-ADD_TEST_EQ(LOCK_WORD_STATE_MASK, static_cast<uint32_t>(art::LockWord::kStateMaskShifted))
-
-#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_SHIFT,
-            static_cast<int32_t>(art::LockWord::kReadBarrierStateShift))
-
-#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK,
-            static_cast<int32_t>(art::LockWord::kReadBarrierStateMaskShifted))
-
-#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xCFFFFFFF
-ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED,
-            static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled))
-
-#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
-ADD_TEST_EQ(LOCK_WORD_THIN_LOCK_COUNT_ONE, static_cast<int32_t>(art::LockWord::kThinLockCountOne))
-
-#define OBJECT_ALIGNMENT_MASK 7
-ADD_TEST_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), art::kObjectAlignment - 1)
-
-#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xFFFFFFF8
-ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED),
-            ~static_cast<uint32_t>(art::kObjectAlignment - 1))
-
-#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128
-ADD_TEST_EQ(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 3
-ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 7
-ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff8
-ADD_TEST_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32),
-            ~static_cast<uint32_t>(
-                art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
-
-#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff8
-ADD_TEST_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64),
-            ~static_cast<uint64_t>(
-                art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
-
-#define ROSALLOC_RUN_FREE_LIST_OFFSET 8
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListOffset()))
-
-#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListHeadOffset()))
-
-#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16
-ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListSizeOffset()))
-
-#define ROSALLOC_SLOT_NEXT_OFFSET 0
-ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET,
-            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunSlotNextOffset()))
-// Assert this so that we can avoid zeroing the next field by installing the class pointer.
-ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET)
-
-#define THREAD_SUSPEND_REQUEST 1
-ADD_TEST_EQ(THREAD_SUSPEND_REQUEST, static_cast<int32_t>(art::kSuspendRequest))
-
-#define THREAD_CHECKPOINT_REQUEST 2
-ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
-
-#define JIT_CHECK_OSR -1
-ADD_TEST_EQ(JIT_CHECK_OSR, static_cast<int32_t>(art::jit::kJitCheckForOSR))
-
-#define JIT_HOTNESS_DISABLE -2
-ADD_TEST_EQ(JIT_HOTNESS_DISABLE, static_cast<int32_t>(art::jit::kJitHotnessDisabled))
 
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index cea7046..e48eca9 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -52,7 +52,6 @@
   kAllocatorTagMonitorList,
   kAllocatorTagClassTable,
   kAllocatorTagInternTable,
-  kAllocatorTagLambdaBoxTable,
   kAllocatorTagMaps,
   kAllocatorTagLOS,
   kAllocatorTagSafeMap,
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index b84e29f..aeb990c 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -163,6 +163,7 @@
 MallocArena::MallocArena(size_t size) {
   memory_ = reinterpret_cast<uint8_t*>(calloc(1, size));
   CHECK(memory_ != nullptr);  // Abort on OOM.
+  DCHECK_ALIGNED(memory_, ArenaAllocator::kAlignment);
   size_ = size;
 }
 
@@ -370,6 +371,7 @@
     arena_head_ = new_arena;
     // Update our internal data structures.
     begin_ = new_arena->Begin();
+    DCHECK_ALIGNED(begin_, kAlignment);
     ptr_ = begin_ + bytes;
     end_ = new_arena->End();
   }
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 6c1a898..3fad96b 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -310,6 +310,7 @@
       return AllocFromNewArena(bytes);
     }
     uint8_t* ret = ptr_;
+    DCHECK_ALIGNED(ret, kAlignment);
     ptr_ += bytes;
     return ret;
   }
@@ -319,20 +320,24 @@
                 ArenaAllocKind kind = kArenaAllocMisc) ALWAYS_INLINE {
     DCHECK_GE(new_size, ptr_size);
     DCHECK_EQ(ptr == nullptr, ptr_size == 0u);
-    auto* end = reinterpret_cast<uint8_t*>(ptr) + ptr_size;
+    // We always allocate aligned.
+    const size_t aligned_ptr_size = RoundUp(ptr_size, kAlignment);
+    auto* end = reinterpret_cast<uint8_t*>(ptr) + aligned_ptr_size;
     // If we haven't allocated anything else, we can safely extend.
     if (end == ptr_) {
       DCHECK(!IsRunningOnMemoryTool());  // Red zone prevents end == ptr_.
-      const size_t size_delta = new_size - ptr_size;
+      const size_t aligned_new_size = RoundUp(new_size, kAlignment);
+      const size_t size_delta = aligned_new_size - aligned_ptr_size;
       // Check remain space.
       const size_t remain = end_ - ptr_;
       if (remain >= size_delta) {
         ptr_ += size_delta;
         ArenaAllocatorStats::RecordAlloc(size_delta, kind);
+        DCHECK_ALIGNED(ptr_, kAlignment);
         return ptr;
       }
     }
-    auto* new_ptr = Alloc(new_size, kind);
+    auto* new_ptr = Alloc(new_size, kind);  // Note: Alloc will take care of aligning new_size.
     memcpy(new_ptr, ptr, ptr_size);
     // TODO: Call free on ptr if linear alloc supports free.
     return new_ptr;
@@ -362,11 +367,12 @@
 
   bool Contains(const void* ptr) const;
 
+  static constexpr size_t kAlignment = 8;
+
  private:
   void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind);
   uint8_t* AllocFromNewArena(size_t bytes);
 
-  static constexpr size_t kAlignment = 8;
 
   void UpdateBytesAllocated();
 
diff --git a/runtime/base/arena_allocator_test.cc b/runtime/base/arena_allocator_test.cc
index 9de3cc4..9932586 100644
--- a/runtime/base/arena_allocator_test.cc
+++ b/runtime/base/arena_allocator_test.cc
@@ -124,4 +124,140 @@
   }
 }
 
+TEST_F(ArenaAllocatorTest, AllocAlignment) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  for (size_t iterations = 0; iterations <= 10; ++iterations) {
+    for (size_t size = 1; size <= ArenaAllocator::kAlignment + 1; ++size) {
+      void* allocation = arena.Alloc(size);
+      EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(allocation))
+          << reinterpret_cast<uintptr_t>(allocation);
+    }
+  }
+}
+
+TEST_F(ArenaAllocatorTest, ReallocAlignment) {
+  {
+    // Case 1: small aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+    // Secondary: expect the same buffer.
+    EXPECT_EQ(original_allocation, realloc_allocation);
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 2: small aligned allocation, non-aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+    // Secondary: expect the same buffer.
+    EXPECT_EQ(original_allocation, realloc_allocation);
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 3: small non-aligned allocation, aligned extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 4;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+    // Secondary: expect the same buffer.
+    EXPECT_EQ(original_allocation, realloc_allocation);
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 4: small non-aligned allocation, aligned non-extend inside arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2);
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = ArenaAllocator::kAlignment * 3;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+    // Secondary: expect the same buffer.
+    EXPECT_EQ(original_allocation, realloc_allocation);
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  // The next part is brittle, as the default size for an arena is variable, and we don't know about
+  // sanitization.
+
+  {
+    // Case 5: large allocation, aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+    // Secondary: expect new buffer.
+    EXPECT_NE(original_allocation, realloc_allocation);
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+
+  {
+    // Case 6: large allocation, non-aligned extend into next arena.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
+    const size_t original_size = Arena::kDefaultSize -
+        ArenaAllocator::kAlignment * 4 -
+        ArenaAllocator::kAlignment / 2;
+    void* original_allocation = arena.Alloc(original_size);
+    ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
+
+    const size_t new_size = Arena::kDefaultSize +
+        ArenaAllocator::kAlignment * 2 +
+        ArenaAllocator::kAlignment / 2;
+    void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
+    // Secondary: expect new buffer.
+    EXPECT_NE(original_allocation, realloc_allocation);
+
+    void* after_alloc = arena.Alloc(1);
+    EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc));
+  }
+}
+
+
 }  // namespace art
diff --git a/runtime/base/arena_containers.h b/runtime/base/arena_containers.h
index e2d4c24..68cacd5 100644
--- a/runtime/base/arena_containers.h
+++ b/runtime/base/arena_containers.h
@@ -20,6 +20,7 @@
 #include <deque>
 #include <queue>
 #include <set>
+#include <stack>
 #include <utility>
 
 #include "arena_allocator.h"
@@ -54,6 +55,12 @@
 using ArenaVector = dchecked_vector<T, ArenaAllocatorAdapter<T>>;
 
 template <typename T, typename Comparator = std::less<T>>
+using ArenaPriorityQueue = std::priority_queue<T, ArenaVector<T>, Comparator>;
+
+template <typename T>
+using ArenaStdStack = std::stack<T, ArenaDeque<T>>;
+
+template <typename T, typename Comparator = std::less<T>>
 using ArenaSet = std::set<T, Comparator, ArenaAllocatorAdapter<T>>;
 
 template <typename K, typename V, typename Comparator = std::less<K>>
diff --git a/runtime/base/array_slice.h b/runtime/base/array_slice.h
index 19ad302..32283d0 100644
--- a/runtime/base/array_slice.h
+++ b/runtime/base/array_slice.h
@@ -129,6 +129,10 @@
     return element_size_;
   }
 
+  bool Contains(const T* element) const {
+    return &AtUnchecked(0) <= element && element < &AtUnchecked(size_);
+  }
+
  private:
   T& AtUnchecked(size_t index) {
     return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
diff --git a/runtime/base/enums.h b/runtime/base/enums.h
new file mode 100644
index 0000000..51b86ea
--- /dev/null
+++ b/runtime/base/enums.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_ENUMS_H_
+#define ART_RUNTIME_BASE_ENUMS_H_
+
+#include <cstddef>
+#include <ostream>
+
+#include "base/logging.h"
+#include "base/macros.h"
+
+namespace art {
+
+enum class PointerSize : size_t {
+  k32 = 4,
+  k64 = 8
+};
+std::ostream& operator<<(std::ostream& os, const PointerSize& rhs);
+
+static constexpr PointerSize kRuntimePointerSize = sizeof(void*) == 8U
+                                                       ? PointerSize::k64
+                                                       : PointerSize::k32;
+
+template <typename T>
+static constexpr PointerSize ConvertToPointerSize(T any) {
+  if (any == 4 || any == 8) {
+    return static_cast<PointerSize>(any);
+  } else {
+    LOG(FATAL);
+    UNREACHABLE();
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_ENUMS_H_
diff --git a/runtime/base/file_magic.cc b/runtime/base/file_magic.cc
index 9756338..de6f423 100644
--- a/runtime/base/file_magic.cc
+++ b/runtime/base/file_magic.cc
@@ -21,27 +21,28 @@
 #include <sys/types.h>
 
 #include "base/logging.h"
+#include "base/unix_file/fd_file.h"
 #include "dex_file.h"
 #include "stringprintf.h"
 
 namespace art {
 
-ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
+File OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
   CHECK(magic != nullptr);
-  ScopedFd fd(open(filename, O_RDONLY, 0));
-  if (fd.get() == -1) {
+  File fd(filename, O_RDONLY, /* check_usage */ false);
+  if (fd.Fd() == -1) {
     *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
-    return ScopedFd();
+    return File();
   }
-  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
+  int n = TEMP_FAILURE_RETRY(read(fd.Fd(), magic, sizeof(*magic)));
   if (n != sizeof(*magic)) {
     *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
-    return ScopedFd();
+    return File();
   }
-  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
+  if (lseek(fd.Fd(), 0, SEEK_SET) != 0) {
     *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
                               strerror(errno));
-    return ScopedFd();
+    return File();
   }
   return fd;
 }
diff --git a/runtime/base/file_magic.h b/runtime/base/file_magic.h
index f7e4bad..4b5d2f5 100644
--- a/runtime/base/file_magic.h
+++ b/runtime/base/file_magic.h
@@ -20,12 +20,12 @@
 #include <stdint.h>
 #include <string>
 
-#include "ScopedFd.h"
+#include "os.h"
 
 namespace art {
 
 // Open file and read magic number
-ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg);
+File OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg);
 
 // Check whether the given magic matches a known file type.
 bool IsZipMagic(uint32_t magic);
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index c7a0ba2..4af47d1 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -202,9 +202,13 @@
 
 template <class Value>
 inline void Histogram<Value>::PrintMemoryUse(std::ostream &os) const {
-  os << Name()
-     << ": Avg: " << PrettySize(Mean()) << " Max: "
-     << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n";
+  os << Name();
+  if (sample_size_ != 0u) {
+    os << ": Avg: " << PrettySize(Mean()) << " Max: "
+       << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n";
+  } else {
+    os << ": <no data>\n";
+  }
 }
 
 template <class Value>
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 212e5bd..28352cb 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -26,7 +26,7 @@
 #include "utils.h"
 
 // Headers for LogMessage::LogLine.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/log.h"
 #else
 #include <sys/types.h>
@@ -47,7 +47,7 @@
 // Print INTERNAL_FATAL messages directly instead of at destruction time. This only works on the
 // host right now: for the device, a stream buf collating output into lines and calling LogLine or
 // lower-level logging is necessary.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static constexpr bool kPrintInternalFatalDirectly = false;
 #else
 static constexpr bool kPrintInternalFatalDirectly = !kIsTargetBuild;
@@ -193,7 +193,10 @@
   }
 }
 LogMessage::~LogMessage() {
-  if (!PrintDirectly(data_->GetSeverity()) && data_->GetSeverity() != LogSeverity::NONE) {
+  if (PrintDirectly(data_->GetSeverity())) {
+    // Add newline at the end to match the not printing directly behavior.
+    std::cerr << '\n';
+  } else if (data_->GetSeverity() != LogSeverity::NONE) {
     if (data_->GetSeverity() < gMinimumLogSeverity) {
       return;  // No need to format something we're not going to output.
     }
@@ -235,7 +238,7 @@
   return data_->GetBuffer();
 }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static const android_LogPriority kLogSeverityToAndroidLogPriority[] = {
   ANDROID_LOG_VERBOSE,  // NONE, use verbose as stand-in, will never be printed.
   ANDROID_LOG_VERBOSE, ANDROID_LOG_DEBUG, ANDROID_LOG_INFO, ANDROID_LOG_WARN,
@@ -251,7 +254,7 @@
     return;
   }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   const char* tag = ProgramInvocationShortName();
   int priority = kLogSeverityToAndroidLogPriority[static_cast<size_t>(log_severity)];
   if (priority == ANDROID_LOG_FATAL) {
@@ -274,7 +277,7 @@
     return;
   }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // Use android_writeLog() to avoid stack-based buffers used by android_printLog().
   const char* tag = ProgramInvocationShortName();
   int priority = kLogSeverityToAndroidLogPriority[static_cast<size_t>(log_severity)];
@@ -311,7 +314,7 @@
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, "] ", 2));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, message, strlen(message)));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, "\n", 1));
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 ScopedLogSeverity::ScopedLogSeverity(LogSeverity level) {
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 3b5b8b5..ac21a3f 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -57,6 +57,7 @@
   bool verifier;
   bool image;
   bool systrace_lock_logging;  // Enabled with "-verbose:sys-locks".
+  bool agents;
 };
 
 // Global log verbosity setting, initialized by InitLogging.
@@ -140,11 +141,11 @@
 
 // Helper for CHECK_STRxx(s1,s2) macros.
 #define CHECK_STROP(s1, s2, sense) \
-  if (UNLIKELY((strcmp(s1, s2) == 0) != sense)) \
+  if (UNLIKELY((strcmp(s1, s2) == 0) != (sense))) \
     LOG(::art::FATAL) << "Check failed: " \
-        << "\"" << s1 << "\"" \
-        << (sense ? " == " : " != ") \
-        << "\"" << s2 << "\""
+        << "\"" << (s1) << "\"" \
+        << ((sense) ? " == " : " != ") \
+        << "\"" << (s2) << "\""
 
 // Check for string (const char*) equality between s1 and s2, LOG(FATAL) if not.
 #define CHECK_STREQ(s1, s2) CHECK_STROP(s1, s2, true)
@@ -156,7 +157,7 @@
     int rc = call args; \
     if (rc != 0) { \
       errno = rc; \
-      PLOG(::art::FATAL) << # call << " failed for " << what; \
+      PLOG(::art::FATAL) << # call << " failed for " << (what); \
     } \
   } while (false)
 
@@ -198,14 +199,14 @@
 // types of LHS and RHS.
 template <typename LHS, typename RHS>
 struct EagerEvaluator {
-  EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
+  constexpr EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
   LHS lhs;
   RHS rhs;
 };
 
 // Helper function for CHECK_xx.
 template <typename LHS, typename RHS>
-static inline EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
+static inline constexpr EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
   return EagerEvaluator<LHS, RHS>(lhs, rhs);
 }
 
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index 7a293c7..5a50247 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -75,7 +75,7 @@
     ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; } \
     ALWAYS_INLINE void operator delete(void*, void*) noexcept { } \
   private: \
-    void* operator new(size_t) = delete
+    void* operator new(size_t) = delete  // NOLINT
 
 // The arraysize(arr) macro returns the # of elements in an array arr.
 // The expression is a compile-time constant, and therefore can be
@@ -135,13 +135,13 @@
 #define ARRAYSIZE_UNSAFE(a) \
   ((sizeof(a) / sizeof(*(a))) / static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
 
-#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f)
+#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f)  // NOLINT
 
 #define OFFSETOF_MEMBER(t, f) \
-  (reinterpret_cast<uintptr_t>(&reinterpret_cast<t*>(16)->f) - static_cast<uintptr_t>(16u)) // NOLINT
+  (reinterpret_cast<uintptr_t>(&reinterpret_cast<t*>(16)->f) - static_cast<uintptr_t>(16u))  // NOLINT
 
 #define OFFSETOF_MEMBERPTR(t, f) \
-  (reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16)) // NOLINT
+  (reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16))  // NOLINT
 
 #define PACKED(x) __attribute__ ((__aligned__(x), __packed__))
 
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index bd8de87..1c32024 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -73,6 +73,11 @@
           level == kThreadListLock ||
           // Ignore logging which may or may not have set up thread data structures.
           level == kLoggingLock ||
+          // When transitioning from suspended to runnable, a daemon thread might be in
+          // a situation where the runtime is shutting down. To not crash our debug locking
+          // mechanism we just pass null Thread* to the MutexLock during that transition
+          // (see Thread::TransitionFromSuspendedToRunnable).
+          level == kThreadSuspendCountLock ||
           // Avoid recursive death.
           level == kAbortLock) << level;
   }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index d1713ed..264a530 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -62,7 +62,6 @@
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
 Mutex* Locks::trace_lock_ = nullptr;
 Mutex* Locks::unexpected_signal_lock_ = nullptr;
-Mutex* Locks::lambda_table_lock_ = nullptr;
 Uninterruptible Roles::uninterruptible_;
 
 struct AllMutexData {
@@ -963,7 +962,6 @@
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
     DCHECK(unexpected_signal_lock_ != nullptr);
-    DCHECK(lambda_table_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
     LockLevel current_lock_level = kInstrumentEntrypointsLock;
@@ -971,7 +969,7 @@
     instrument_entrypoints_lock_ = new Mutex("instrument entrypoint lock", current_lock_level);
 
     #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
-      if (new_level >= current_lock_level) { \
+      if ((new_level) >= current_lock_level) { \
         /* Do not use CHECKs or FATAL here, abort_lock_ is not setup yet. */ \
         fprintf(stderr, "New local level %d is not less than current level %d\n", \
                 new_level, current_lock_level); \
@@ -1074,10 +1072,6 @@
     DCHECK(reference_queue_soft_references_lock_ == nullptr);
     reference_queue_soft_references_lock_ = new Mutex("ReferenceQueue soft references lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kLambdaTableLock);
-    DCHECK(lambda_table_lock_ == nullptr);
-    lambda_table_lock_ = new Mutex("lambda table lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
     DCHECK(abort_lock_ == nullptr);
     abort_lock_ = new Mutex("abort lock", current_lock_level, true);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3d7624d..d0dc886 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -60,7 +60,6 @@
   kUnexpectedSignalLock,
   kThreadSuspendCountLock,
   kAbortLock,
-  kLambdaTableLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
   kRosAllocGlobalLock,
@@ -88,7 +87,6 @@
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
   kDeoptimizedMethodsLock,
-  kJitCodeCacheLock,
   kClassLoaderClassesLock,
   kDefaultMutexLevel,
   kMarkSweepLargeObjectLock,
@@ -99,6 +97,7 @@
   kMonitorPoolLock,
   kMethodVerifiersLock,
   kClassLinkerClassesLock,  // TODO rename.
+  kJitCodeCacheLock,
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
@@ -690,10 +689,6 @@
 
   // Have an exclusive logging thread.
   static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
-
-  // Allow reader-writer mutual exclusion on the boxed table of lambda objects.
-  // TODO: this should be a RW mutex lock, except that ConditionVariables don't work with it.
-  static Mutex* lambda_table_lock_ ACQUIRED_AFTER(mutator_lock_);
 };
 
 class Roles {
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index e4097dd..48e3ceb 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -53,7 +53,15 @@
       fd_(fd), file_path_(path), auto_close_(true), read_only_mode_(read_only_mode) {
 }
 
-FdFile::~FdFile() {
+FdFile::FdFile(const std::string& path, int flags, mode_t mode, bool check_usage)
+    : fd_(-1), auto_close_(true) {
+  Open(path, flags, mode);
+  if (!check_usage || !IsOpened()) {
+    guard_state_ = GuardState::kNoCheck;
+  }
+}
+
+void FdFile::Destroy() {
   if (kCheckSafeUsage && (guard_state_ < GuardState::kNoCheck)) {
     if (guard_state_ < GuardState::kFlushed) {
       LOG(::art::ERROR) << "File " << file_path_ << " wasn't explicitly flushed before destruction.";
@@ -70,6 +78,28 @@
   }
 }
 
+FdFile& FdFile::operator=(FdFile&& other) {
+  if (this == &other) {
+    return *this;
+  }
+
+  if (this->fd_ != other.fd_) {
+    Destroy();  // Free old state.
+  }
+
+  guard_state_ = other.guard_state_;
+  fd_ = other.fd_;
+  file_path_ = std::move(other.file_path_);
+  auto_close_ = other.auto_close_;
+  other.Release();  // Release other.
+
+  return *this;
+}
+
+FdFile::~FdFile() {
+  Destroy();
+}
+
 void FdFile::moveTo(GuardState target, GuardState warn_threshold, const char* warning) {
   if (kCheckSafeUsage) {
     if (guard_state_ < GuardState::kNoCheck) {
@@ -102,14 +132,14 @@
 }
 
 bool FdFile::Open(const std::string& path, int flags, mode_t mode) {
+  static_assert(O_RDONLY == 0, "Readonly flag has unexpected value.");
   CHECK_EQ(fd_, -1) << path;
-  read_only_mode_ = (flags & O_RDONLY) != 0;
+  read_only_mode_ = ((flags & O_ACCMODE) == O_RDONLY);
   fd_ = TEMP_FAILURE_RETRY(open(path.c_str(), flags, mode));
   if (fd_ == -1) {
     return false;
   }
   file_path_ = path;
-  static_assert(O_RDONLY == 0, "Readonly flag has unexpected value.");
   if (kCheckSafeUsage && (flags & (O_RDWR | O_CREAT | O_WRONLY)) != 0) {
     // Start in the base state (not flushed, not closed).
     guard_state_ = GuardState::kBase;
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 16cd44f..d896ee9 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -18,7 +18,9 @@
 #define ART_RUNTIME_BASE_UNIX_FILE_FD_FILE_H_
 
 #include <fcntl.h>
+
 #include <string>
+
 #include "base/unix_file/random_access_file.h"
 #include "base/macros.h"
 
@@ -39,6 +41,46 @@
   FdFile(int fd, const std::string& path, bool checkUsage);
   FdFile(int fd, const std::string& path, bool checkUsage, bool read_only_mode);
 
+  FdFile(const std::string& path, int flags, bool checkUsage)
+      : FdFile(path, flags, 0640, checkUsage) {}
+  FdFile(const std::string& path, int flags, mode_t mode, bool checkUsage);
+
+  // Move constructor.
+  FdFile(FdFile&& other)
+      : guard_state_(other.guard_state_),
+        fd_(other.fd_),
+        file_path_(std::move(other.file_path_)),
+        auto_close_(other.auto_close_),
+        read_only_mode_(other.read_only_mode_) {
+    other.Release();  // Release the src.
+  }
+
+  // Move assignment operator.
+  FdFile& operator=(FdFile&& other);
+
+  // Release the file descriptor. This will make further accesses to this FdFile invalid. Disables
+  // all further state checking.
+  int Release() {
+    int tmp_fd = fd_;
+    fd_ = -1;
+    guard_state_ = GuardState::kNoCheck;
+    auto_close_ = false;
+    return tmp_fd;
+  }
+
+  void Reset(int fd, bool check_usage) {
+    if (fd_ != -1 && fd_ != fd) {
+      Destroy();
+    }
+    fd_ = fd;
+    if (check_usage) {
+      guard_state_ = fd == -1 ? GuardState::kNoCheck : GuardState::kBase;
+    } else {
+      guard_state_ = GuardState::kNoCheck;
+    }
+    // Keep the auto_close_ state.
+  }
+
   // Destroys an FdFile, closing the file descriptor if Close hasn't already
   // been called. (If you care about the return value of Close, call it
   // yourself; this is meant to handle failure cases and read-only accesses.
@@ -46,10 +88,6 @@
   // guarantee that data actually made it to stable storage.)
   virtual ~FdFile();
 
-  // Opens file 'file_path' using 'flags' and 'mode'.
-  bool Open(const std::string& file_path, int flags);
-  bool Open(const std::string& file_path, int flags, mode_t mode);
-
   // RandomAccessFile API.
   int Close() OVERRIDE WARN_UNUSED;
   int64_t Read(char* buf, int64_t byte_count, int64_t offset) const OVERRIDE WARN_UNUSED;
@@ -119,10 +157,16 @@
 
   GuardState guard_state_;
 
+  // Opens file 'file_path' using 'flags' and 'mode'.
+  bool Open(const std::string& file_path, int flags);
+  bool Open(const std::string& file_path, int flags, mode_t mode);
+
  private:
   template <bool kUseOffset>
   bool WriteFullyGeneric(const void* buffer, size_t byte_count, size_t offset);
 
+  void Destroy();  // For ~FdFile and operator=(&&).
+
   int fd_;
   std::string file_path_;
   bool auto_close_;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 9bc87e5..99ef6f7 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -49,29 +49,31 @@
 
 TEST_F(FdFileTest, OpenClose) {
   std::string good_path(GetTmpPath("some-file.txt"));
-  FdFile file;
-  ASSERT_TRUE(file.Open(good_path, O_CREAT | O_WRONLY));
+  FdFile file(good_path, O_CREAT | O_WRONLY, true);
+  ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
+  EXPECT_FALSE(file.ReadOnlyMode());
   EXPECT_EQ(0, file.Flush());
   EXPECT_EQ(0, file.Close());
   EXPECT_EQ(-1, file.Fd());
   EXPECT_FALSE(file.IsOpened());
-  EXPECT_TRUE(file.Open(good_path,  O_RDONLY));
-  EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
+  FdFile file2(good_path, O_RDONLY, true);
+  EXPECT_TRUE(file2.IsOpened());
+  EXPECT_TRUE(file2.ReadOnlyMode());
+  EXPECT_GE(file2.Fd(), 0);
 
-  ASSERT_EQ(file.Close(), 0);
+  ASSERT_EQ(file2.Close(), 0);
   ASSERT_EQ(unlink(good_path.c_str()), 0);
 }
 
 TEST_F(FdFileTest, ReadFullyEmptyFile) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
-  FdFile file;
-  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDONLY));
+  FdFile file(tmp.GetFilename(), O_RDONLY, false);
+  ASSERT_TRUE(file.IsOpened());
+  EXPECT_TRUE(file.ReadOnlyMode());
   EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
   uint8_t buffer[16];
   EXPECT_FALSE(file.ReadFully(&buffer, 4));
 }
@@ -84,10 +86,10 @@
 TEST_F(FdFileTest, ReadFullyWithOffset) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
-  FdFile file;
-  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDWR));
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
+  EXPECT_FALSE(file.ReadOnlyMode());
 
   char ignore_prefix[20] = {'a', };
   NullTerminateCharArray(ignore_prefix);
@@ -113,10 +115,10 @@
 TEST_F(FdFileTest, ReadWriteFullyWithOffset) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
-  FdFile file;
-  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDWR));
-  EXPECT_GE(file.Fd(), 0);
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
+  EXPECT_FALSE(file.ReadOnlyMode());
 
   const char* test_string = "This is a test string";
   size_t length = strlen(test_string) + 1;
@@ -140,8 +142,7 @@
 
 TEST_F(FdFileTest, Copy) {
   art::ScratchFile src_tmp;
-  FdFile src;
-  ASSERT_TRUE(src.Open(src_tmp.GetFilename(), O_RDWR));
+  FdFile src(src_tmp.GetFilename(), O_RDWR, false);
   ASSERT_GE(src.Fd(), 0);
   ASSERT_TRUE(src.IsOpened());
 
@@ -151,8 +152,7 @@
   ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), src.GetLength());
 
   art::ScratchFile dest_tmp;
-  FdFile dest;
-  ASSERT_TRUE(dest.Open(src_tmp.GetFilename(), O_RDWR));
+  FdFile dest(src_tmp.GetFilename(), O_RDWR, false);
   ASSERT_GE(dest.Fd(), 0);
   ASSERT_TRUE(dest.IsOpened());
 
@@ -168,4 +168,22 @@
   ASSERT_EQ(0, src.Close());
 }
 
+TEST_F(FdFileTest, MoveConstructor) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
+  EXPECT_GE(file.Fd(), 0);
+
+  int old_fd = file.Fd();
+
+  FdFile file2(std::move(file));
+  EXPECT_FALSE(file.IsOpened());
+  EXPECT_TRUE(file2.IsOpened());
+  EXPECT_EQ(old_fd, file2.Fd());
+
+  ASSERT_EQ(file2.Flush(), 0);
+  ASSERT_EQ(file2.Close(), 0);
+}
+
 }  // namespace unix_file
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index beabce3..96fa53c 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -1176,14 +1176,16 @@
       return false;
     }
 
-    // Get the *correct* JNIEnv by going through our TLS pointer.
+    // Get the current thread's JNIEnv by going through our TLS pointer.
     JNIEnvExt* threadEnv = self->GetJniEnv();
 
     // Verify that the current thread is (a) attached and (b) associated with
     // this particular instance of JNIEnv.
     if (env != threadEnv) {
+      // Get the thread owning the JNIEnv that's being used.
+      Thread* envThread = reinterpret_cast<JNIEnvExt*>(env)->self;
       AbortF("thread %s using JNIEnv* from thread %s",
-             ToStr<Thread>(*self).c_str(), ToStr<Thread>(*self).c_str());
+             ToStr<Thread>(*self).c_str(), ToStr<Thread>(*envThread).c_str());
       return false;
     }
 
@@ -2427,19 +2429,20 @@
                                                      Primitive::kPrimDouble));
   }
 
+// NOLINT added to avoid wrong warning/fix from clang-tidy.
 #define PRIMITIVE_ARRAY_FUNCTIONS(ctype, name, ptype) \
-  static ctype* Get##name##ArrayElements(JNIEnv* env, ctype##Array array, jboolean* is_copy) { \
-    return reinterpret_cast<ctype*>( \
+  static ctype* Get##name##ArrayElements(JNIEnv* env, ctype##Array array, jboolean* is_copy) { /* NOLINT */ \
+    return reinterpret_cast<ctype*>( /* NOLINT */ \
         GetPrimitiveArrayElements(__FUNCTION__, ptype, env, array, is_copy)); \
   } \
   \
-  static void Release##name##ArrayElements(JNIEnv* env, ctype##Array array, ctype* elems, \
+  static void Release##name##ArrayElements(JNIEnv* env, ctype##Array array, ctype* elems, /* NOLINT */ \
                                            jint mode) { \
     ReleasePrimitiveArrayElements(__FUNCTION__, ptype, env, array, elems, mode); \
   } \
   \
   static void Get##name##ArrayRegion(JNIEnv* env, ctype##Array array, jsize start, jsize len, \
-                                     ctype* buf) { \
+                                     ctype* buf) { /* NOLINT */ \
     GetPrimitiveArrayRegion(__FUNCTION__, ptype, env, array, start, len, buf); \
   } \
   \
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index f3e260b..97aa499 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -27,6 +27,8 @@
 #include "mirror/object_array.h"
 #include "handle_scope-inl.h"
 
+#include <atomic>
+
 namespace art {
 
 inline mirror::Class* ClassLinker::FindSystemClass(Thread* self, const char* descriptor) {
@@ -63,18 +65,21 @@
 inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx, ArtMethod* referrer) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
-  mirror::String* resolved_string = declaring_class->GetDexCacheStrings()[string_idx].Read();
-  if (UNLIKELY(resolved_string == nullptr)) {
+  DCHECK_LT(string_idx, declaring_class->GetDexFile().NumStringIds());;
+  mirror::String* string =
+        mirror::StringDexCachePair::LookupString(declaring_class->GetDexCacheStrings(),
+                                                 string_idx,
+                                                 mirror::DexCache::kDexCacheStringCacheSize).Read();
+  if (UNLIKELY(string == nullptr)) {
     StackHandleScope<1> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
-    resolved_string = ResolveString(dex_file, string_idx, dex_cache);
-    if (resolved_string != nullptr) {
-      DCHECK_EQ(dex_cache->GetResolvedString(string_idx), resolved_string);
+    string = ResolveString(dex_file, string_idx, dex_cache);
+    if (string != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedString(string_idx), string);
     }
   }
-  return resolved_string;
+  return string;
 }
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, ArtMethod* referrer) {
@@ -225,6 +230,34 @@
   return klass;
 }
 
+template<ReadBarrierOption kReadBarrierOption>
+ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method) {
+  DCHECK(proxy_class->IsProxyClass());
+  DCHECK(proxy_method->IsProxyMethod<kReadBarrierOption>());
+  {
+    Thread* const self = Thread::Current();
+    ReaderMutexLock mu(self, dex_lock_);
+    // Locate the dex cache of the original interface/Object
+    for (const DexCacheData& data : dex_caches_) {
+      if (!self->IsJWeakCleared(data.weak_root) &&
+          proxy_method->HasSameDexCacheResolvedTypes(data.resolved_types,
+                                                     image_pointer_size_)) {
+        mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+            self->DecodeJObject(data.weak_root));
+        if (dex_cache != nullptr) {
+          ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
+              proxy_method->GetDexMethodIndex(), image_pointer_size_);
+          CHECK(resolved_method != nullptr);
+          return resolved_method;
+        }
+      }
+    }
+  }
+  LOG(FATAL) << "Didn't find dex cache for " << PrettyClass(proxy_class) << " "
+      << PrettyMethod(proxy_method);
+  UNREACHABLE();
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_LINKER_INL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 1914733..f4400c3 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -66,6 +66,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/field.h"
 #include "mirror/iftable-inl.h"
@@ -323,7 +324,7 @@
       quick_imt_conflict_trampoline_(nullptr),
       quick_generic_jni_trampoline_(nullptr),
       quick_to_interpreter_bridge_trampoline_(nullptr),
-      image_pointer_size_(sizeof(void*)) {
+      image_pointer_size_(kRuntimePointerSize) {
   CHECK(intern_table_ != nullptr);
   static_assert(kFindArrayCacheSize == arraysize(find_array_class_cache_),
                 "Array cache size wrong.");
@@ -361,10 +362,6 @@
 
   // Use the pointer size from the runtime since we are probably creating the image.
   image_pointer_size_ = InstructionSetPointerSize(runtime->GetInstructionSet());
-  if (!ValidPointerSize(image_pointer_size_)) {
-    *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_);
-    return false;
-  }
 
   // java_lang_Class comes first, it's needed for AllocClass
   // The GC can't handle an object with a null class since we can't get the size of this object.
@@ -791,7 +788,7 @@
 
 static void SanityCheckArtMethodPointerArray(mirror::PointerArray* arr,
                                              mirror::Class* expected_class,
-                                             size_t pointer_size,
+                                             PointerSize pointer_size,
                                              const std::vector<gc::space::ImageSpace*>& spaces)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(arr != nullptr);
@@ -809,7 +806,7 @@
 
 static void SanityCheckArtMethodPointerArray(ArtMethod** arr,
                                              size_t size,
-                                             size_t pointer_size,
+                                             PointerSize pointer_size,
                                              const std::vector<gc::space::ImageSpace*>& spaces)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK_EQ(arr != nullptr, size != 0u);
@@ -883,7 +880,7 @@
 // Set image methods' entry point to interpreter.
 class SetInterpreterEntrypointArtMethodVisitor : public ArtMethodVisitor {
  public:
-  explicit SetInterpreterEntrypointArtMethodVisitor(size_t image_pointer_size)
+  explicit SetInterpreterEntrypointArtMethodVisitor(PointerSize image_pointer_size)
     : image_pointer_size_(image_pointer_size) {}
 
   void Visit(ArtMethod* method) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -897,7 +894,7 @@
   }
 
  private:
-  const size_t image_pointer_size_;
+  const PointerSize image_pointer_size_;
 
   DISALLOW_COPY_AND_ASSIGN(SetInterpreterEntrypointArtMethodVisitor);
 };
@@ -907,7 +904,7 @@
   const void* quick_imt_conflict_trampoline;
   const void* quick_generic_jni_trampoline;
   const void* quick_to_interpreter_bridge_trampoline;
-  size_t pointer_size;
+  PointerSize pointer_size;
   ArtMethod* m;
   bool error;
 };
@@ -939,18 +936,19 @@
   gc::Heap* const heap = runtime->GetHeap();
   std::vector<gc::space::ImageSpace*> spaces = heap->GetBootImageSpaces();
   CHECK(!spaces.empty());
-  image_pointer_size_ = spaces[0]->GetImageHeader().GetPointerSize();
-  if (!ValidPointerSize(image_pointer_size_)) {
-    *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_);
+  uint32_t pointer_size_unchecked = spaces[0]->GetImageHeader().GetPointerSizeUnchecked();
+  if (!ValidPointerSize(pointer_size_unchecked)) {
+    *error_msg = StringPrintf("Invalid image pointer size: %u", pointer_size_unchecked);
     return false;
   }
+  image_pointer_size_ = spaces[0]->GetImageHeader().GetPointerSize();
   if (!runtime->IsAotCompiler()) {
     // Only the Aot compiler supports having an image with a different pointer size than the
     // runtime. This happens on the host for compiling 32 bit tests since we use a 64 bit libart
     // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps.
-    if (image_pointer_size_ != sizeof(void*)) {
+    if (image_pointer_size_ != kRuntimePointerSize) {
       *error_msg = StringPrintf("Runtime must use current image pointer size: %zu vs %zu",
-                                image_pointer_size_,
+                                static_cast<size_t>(image_pointer_size_),
                                 sizeof(void*));
       return false;
     }
@@ -1150,7 +1148,7 @@
   explicit FixupArtMethodArrayVisitor(const ImageHeader& header) : header_(header) {}
 
   virtual void Visit(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
-    GcRoot<mirror::Class>* resolved_types = method->GetDexCacheResolvedTypes(sizeof(void*));
+    GcRoot<mirror::Class>* resolved_types = method->GetDexCacheResolvedTypes(kRuntimePointerSize);
     const bool is_copied = method->IsCopied();
     if (resolved_types != nullptr) {
       bool in_image_space = false;
@@ -1165,10 +1163,10 @@
       if (!is_copied || in_image_space) {
         // Go through the array so that we don't need to do a slow map lookup.
         method->SetDexCacheResolvedTypes(*reinterpret_cast<GcRoot<mirror::Class>**>(resolved_types),
-                                         sizeof(void*));
+                                         kRuntimePointerSize);
       }
     }
-    ArtMethod** resolved_methods = method->GetDexCacheResolvedMethods(sizeof(void*));
+    ArtMethod** resolved_methods = method->GetDexCacheResolvedMethods(kRuntimePointerSize);
     if (resolved_methods != nullptr) {
       bool in_image_space = false;
       if (kIsDebugBuild || is_copied) {
@@ -1182,7 +1180,7 @@
       if (!is_copied || in_image_space) {
         // Go through the array so that we don't need to do a slow map lookup.
         method->SetDexCacheResolvedMethods(*reinterpret_cast<ArtMethod***>(resolved_methods),
-                                           sizeof(void*));
+                                           kRuntimePointerSize);
       }
     }
   }
@@ -1274,7 +1272,10 @@
       // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and
         // copy over the arrays.
         DCHECK(dex_file != nullptr);
-        const size_t num_strings = dex_file->NumStringIds();
+        size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize;
+        if (dex_file->NumStringIds() < num_strings) {
+          num_strings = dex_file->NumStringIds();
+        }
         const size_t num_types = dex_file->NumTypeIds();
         const size_t num_methods = dex_file->NumMethodIds();
         const size_t num_fields = dex_file->NumFieldIds();
@@ -1284,16 +1285,17 @@
         CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
         DexCacheArraysLayout layout(image_pointer_size_, dex_file);
         uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays();
-        // The space is not yet visible to the GC, we can avoid the read barriers and use
-        // std::copy_n.
         if (num_strings != 0u) {
-          GcRoot<mirror::String>* const image_resolved_strings = dex_cache->GetStrings();
-          GcRoot<mirror::String>* const strings =
-              reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
-          for (size_t j = 0; kIsDebugBuild && j < num_strings; ++j) {
-            DCHECK(strings[j].IsNull());
+          mirror::StringDexCacheType* const image_resolved_strings = dex_cache->GetStrings();
+          mirror::StringDexCacheType* const strings =
+              reinterpret_cast<mirror::StringDexCacheType*>(raw_arrays + layout.StringsOffset());
+          for (size_t j = 0; j < num_strings; ++j) {
+            DCHECK_EQ(strings[j].load(std::memory_order_relaxed).string_index, 0u);
+            DCHECK(strings[j].load(std::memory_order_relaxed).string_pointer.IsNull());
+            strings[j].store(image_resolved_strings[j].load(std::memory_order_relaxed),
+                             std::memory_order_relaxed);
           }
-          std::copy_n(image_resolved_strings, num_strings, strings);
+          mirror::StringDexCachePair::Initialize(strings);
           dex_cache->SetStrings(strings);
         }
         if (num_types != 0u) {
@@ -1382,11 +1384,11 @@
                 VLOG(image) << "From " << klass->GetDexCache()->GetDexFile()->GetBaseLocation();
               }
               VLOG(image) << "Direct methods";
-              for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+              for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
                 VLOG(image) << PrettyMethod(&m);
               }
               VLOG(image) << "Virtual methods";
-              for (ArtMethod& m : klass->GetVirtualMethods(sizeof(void*))) {
+              for (ArtMethod& m : klass->GetVirtualMethods(kRuntimePointerSize)) {
                 VLOG(image) << PrettyMethod(&m);
               }
             }
@@ -1422,7 +1424,7 @@
               }
             }
             if (kIsDebugBuild) {
-              for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+              for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
                 const void* code = m.GetEntryPointFromQuickCompiledCode();
                 const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
                 if (!IsQuickResolutionStub(code) &&
@@ -1432,7 +1434,7 @@
                   DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
                 }
               }
-              for (ArtMethod& m : klass->GetVirtualMethods(sizeof(void*))) {
+              for (ArtMethod& m : klass->GetVirtualMethods(kRuntimePointerSize)) {
                 const void* code = m.GetEntryPointFromQuickCompiledCode();
                 const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
                 if (!IsQuickResolutionStub(code) &&
@@ -1451,14 +1453,14 @@
   if (*out_forward_dex_cache_array) {
     ScopedTrace timing("Fixup ArtMethod dex cache arrays");
     FixupArtMethodArrayVisitor visitor(header);
-    header.VisitPackedArtMethods(&visitor, space->Begin(), sizeof(void*));
+    header.VisitPackedArtMethods(&visitor, space->Begin(), kRuntimePointerSize);
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
   }
   if (kVerifyArtMethodDeclaringClasses) {
     ScopedTrace timing("Verify declaring classes");
     ReaderMutexLock rmu(self, *Locks::heap_bitmap_lock_);
     VerifyDeclaringClassVisitor visitor;
-    header.VisitPackedArtMethods(&visitor, space->Begin(), sizeof(void*));
+    header.VisitPackedArtMethods(&visitor, space->Begin(), kRuntimePointerSize);
   }
   return true;
 }
@@ -1476,14 +1478,14 @@
 
   bool operator()(mirror::Class* klass) const SHARED_REQUIRES(Locks::mutator_lock_) {
     if (forward_strings_) {
-      GcRoot<mirror::String>* strings = klass->GetDexCacheStrings();
+      mirror::StringDexCacheType* strings = klass->GetDexCacheStrings();
       if (strings != nullptr) {
         DCHECK(
             space_->GetImageHeader().GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
                 reinterpret_cast<uint8_t*>(strings) - space_->Begin()))
             << "String dex cache array for " << PrettyClass(klass) << " is not in app image";
         // Dex caches have already been updated, so take the strings pointer from there.
-        GcRoot<mirror::String>* new_strings = klass->GetDexCache()->GetStrings();
+        mirror::StringDexCacheType* new_strings = klass->GetDexCache()->GetStrings();
         DCHECK_NE(strings, new_strings);
         klass->SetDexCacheStrings(new_strings);
       }
@@ -1504,11 +1506,8 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(error_msg != nullptr);
   std::unique_ptr<const DexFile> dex_file;
-  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(location, nullptr);
+  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(location, nullptr, error_msg);
   if (oat_dex_file == nullptr) {
-    *error_msg = StringPrintf("Failed finding oat dex file for %s %s",
-                              oat_file->GetLocation().c_str(),
-                              location);
     return std::unique_ptr<const DexFile>();
   }
   std::string inner_error_msg;
@@ -1810,7 +1809,7 @@
     // This verification needs to happen after the classes have been added to the class loader.
     // Since it ensures classes are in the class table.
     VerifyClassInTableArtMethodVisitor visitor2(class_table);
-    header.VisitPackedArtMethods(&visitor2, space->Begin(), sizeof(void*));
+    header.VisitPackedArtMethods(&visitor2, space->Begin(), kRuntimePointerSize);
   }
   VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
   return true;
@@ -2054,9 +2053,10 @@
 }
 
 mirror::PointerArray* ClassLinker::AllocPointerArray(Thread* self, size_t length) {
-  return down_cast<mirror::PointerArray*>(image_pointer_size_ == 8u ?
-      static_cast<mirror::Array*>(mirror::LongArray::Alloc(self, length)) :
-      static_cast<mirror::Array*>(mirror::IntArray::Alloc(self, length)));
+  return down_cast<mirror::PointerArray*>(
+      image_pointer_size_ == PointerSize::k64
+          ? static_cast<mirror::Array*>(mirror::LongArray::Alloc(self, length))
+          : static_cast<mirror::Array*>(mirror::IntArray::Alloc(self, length)));
 }
 
 mirror::DexCache* ClassLinker::AllocDexCache(Thread* self,
@@ -2081,23 +2081,34 @@
     raw_arrays = dex_file.GetOatDexFile()->GetDexCacheArrays();
   } else if (dex_file.NumStringIds() != 0u || dex_file.NumTypeIds() != 0u ||
       dex_file.NumMethodIds() != 0u || dex_file.NumFieldIds() != 0u) {
-    // NOTE: We "leak" the raw_arrays because we never destroy the dex cache.
-    DCHECK(image_pointer_size_ == 4u || image_pointer_size_ == 8u);
     // Zero-initialized.
     raw_arrays = reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
   }
-  GcRoot<mirror::String>* strings = (dex_file.NumStringIds() == 0u) ? nullptr :
-      reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
+  mirror::StringDexCacheType* strings = (dex_file.NumStringIds() == 0u) ? nullptr :
+      reinterpret_cast<mirror::StringDexCacheType*>(raw_arrays + layout.StringsOffset());
   GcRoot<mirror::Class>* types = (dex_file.NumTypeIds() == 0u) ? nullptr :
       reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset());
   ArtMethod** methods = (dex_file.NumMethodIds() == 0u) ? nullptr :
       reinterpret_cast<ArtMethod**>(raw_arrays + layout.MethodsOffset());
   ArtField** fields = (dex_file.NumFieldIds() == 0u) ? nullptr :
       reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+  size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize;
+  if (dex_file.NumStringIds() < num_strings) {
+    num_strings = dex_file.NumStringIds();
+  }
+  DCHECK_ALIGNED(raw_arrays, alignof(mirror::StringDexCacheType)) <<
+                 "Expected raw_arrays to align to StringDexCacheType.";
+  DCHECK_ALIGNED(layout.StringsOffset(), alignof(mirror::StringDexCacheType)) <<
+                 "Expected StringsOffset() to align to StringDexCacheType.";
+  DCHECK_ALIGNED(strings, alignof(mirror::StringDexCacheType)) <<
+                 "Expected strings to align to StringDexCacheType.";
+  static_assert(alignof(mirror::StringDexCacheType) == 8u,
+                "Expected StringDexCacheType to have align of 8.");
   if (kIsDebugBuild) {
     // Sanity check to make sure all the dex cache arrays are empty. b/28992179
-    for (size_t i = 0; i < dex_file.NumStringIds(); ++i) {
-      CHECK(strings[i].Read<kWithoutReadBarrier>() == nullptr);
+    for (size_t i = 0; i < num_strings; ++i) {
+      CHECK_EQ(strings[i].load(std::memory_order_relaxed).string_index, 0u);
+      CHECK(strings[i].load(std::memory_order_relaxed).string_pointer.IsNull());
     }
     for (size_t i = 0; i < dex_file.NumTypeIds(); ++i) {
       CHECK(types[i].Read<kWithoutReadBarrier>() == nullptr);
@@ -2109,10 +2120,13 @@
       CHECK(mirror::DexCache::GetElementPtrSize(fields, i, image_pointer_size_) == nullptr);
     }
   }
+  if (strings != nullptr) {
+    mirror::StringDexCachePair::Initialize(strings);
+  }
   dex_cache->Init(&dex_file,
                   location.Get(),
                   strings,
-                  dex_file.NumStringIds(),
+                  num_strings,
                   types,
                   dex_file.NumTypeIds(),
                   methods,
@@ -2179,20 +2193,37 @@
   }
 
   // Wait for the class if it has not already been linked.
-  if (!klass->IsResolved() && !klass->IsErroneous()) {
+  size_t index = 0;
+  // Maximum number of yield iterations until we start sleeping.
+  static const size_t kNumYieldIterations = 1000;
+  // How long each sleep is in us.
+  static const size_t kSleepDurationUS = 1000;  // 1 ms.
+  while (!klass->IsResolved() && !klass->IsErroneous()) {
     StackHandleScope<1> hs(self);
     HandleWrapper<mirror::Class> h_class(hs.NewHandleWrapper(&klass));
-    ObjectLock<mirror::Class> lock(self, h_class);
-    // Check for circular dependencies between classes.
-    if (!h_class->IsResolved() && h_class->GetClinitThreadId() == self->GetTid()) {
-      ThrowClassCircularityError(h_class.Get());
-      mirror::Class::SetStatus(h_class, mirror::Class::kStatusError, self);
-      return nullptr;
+    {
+      ObjectTryLock<mirror::Class> lock(self, h_class);
+      // Can not use a monitor wait here since it may block when returning and deadlock if another
+      // thread has locked klass.
+      if (lock.Acquired()) {
+        // Check for circular dependencies between classes, the lock is required for SetStatus.
+        if (!h_class->IsResolved() && h_class->GetClinitThreadId() == self->GetTid()) {
+          ThrowClassCircularityError(h_class.Get());
+          mirror::Class::SetStatus(h_class, mirror::Class::kStatusError, self);
+          return nullptr;
+        }
+      }
     }
-    // Wait for the pending initialization to complete.
-    while (!h_class->IsResolved() && !h_class->IsErroneous()) {
-      lock.WaitIgnoringInterrupts();
+    {
+      // Handle wrapper deals with klass moving.
+      ScopedThreadSuspension sts(self, kSuspended);
+      if (index < kNumYieldIterations) {
+        sched_yield();
+      } else {
+        usleep(kSleepDurationUS);
+      }
     }
+    ++index;
   }
 
   if (klass->IsErroneous()) {
@@ -2538,6 +2569,10 @@
   CHECK(h_new_class.Get() != nullptr) << descriptor;
   CHECK(h_new_class->IsResolved()) << descriptor;
 
+  // Update the dex cache of where the class is defined. Inlining depends on having
+  // this filled.
+  h_new_class->GetDexCache()->SetResolvedType(h_new_class->GetDexTypeIndex(), h_new_class.Get());
+
   // Instrumentation may have updated entrypoints for all methods of all
   // classes. However it could not update methods of this class while we
   // were loading it. Now the class is resolved, we can update entrypoints
@@ -3546,32 +3581,40 @@
     }
     LOG(INFO) << "Loaded class " << descriptor << source;
   }
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  mirror::ClassLoader* const class_loader = klass->GetClassLoader();
-  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
-  mirror::Class* existing = class_table->Lookup(descriptor, hash);
-  if (existing != nullptr) {
-    return existing;
-  }
-  if (kIsDebugBuild &&
-      !klass->IsTemp() &&
-      class_loader == nullptr &&
-      dex_cache_boot_image_class_lookup_required_) {
-    // Check a class loaded with the system class loader matches one in the image if the class
-    // is in the image.
-    existing = LookupClassFromBootImage(descriptor);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    mirror::ClassLoader* const class_loader = klass->GetClassLoader();
+    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+    mirror::Class* existing = class_table->Lookup(descriptor, hash);
     if (existing != nullptr) {
-      CHECK_EQ(klass, existing);
+      return existing;
+    }
+    if (kIsDebugBuild &&
+        !klass->IsTemp() &&
+        class_loader == nullptr &&
+        dex_cache_boot_image_class_lookup_required_) {
+      // Check a class loaded with the system class loader matches one in the image if the class
+      // is in the image.
+      existing = LookupClassFromBootImage(descriptor);
+      if (existing != nullptr) {
+        CHECK_EQ(klass, existing);
+      }
+    }
+    VerifyObject(klass);
+    class_table->InsertWithHash(klass, hash);
+    if (class_loader != nullptr) {
+      // This is necessary because we need to have the card dirtied for remembered sets.
+      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+    }
+    if (log_new_class_table_roots_) {
+      new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
     }
   }
-  VerifyObject(klass);
-  class_table->InsertWithHash(klass, hash);
-  if (class_loader != nullptr) {
-    // This is necessary because we need to have the card dirtied for remembered sets.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
-  }
-  if (log_new_class_table_roots_) {
-    new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
+  if (kIsDebugBuild) {
+    // Test that copied methods correctly can find their holder.
+    for (ArtMethod& method : klass->GetCopiedMethods(image_pointer_size_)) {
+      CHECK_EQ(GetHoldingClassOfCopiedMethod(&method), klass);
+    }
   }
   return nullptr;
 }
@@ -4279,33 +4322,6 @@
   return DotToDescriptor(name->ToModifiedUtf8().c_str());
 }
 
-ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method) {
-  DCHECK(proxy_class->IsProxyClass());
-  DCHECK(proxy_method->IsProxyMethod());
-  {
-    Thread* const self = Thread::Current();
-    ReaderMutexLock mu(self, dex_lock_);
-    // Locate the dex cache of the original interface/Object
-    for (const DexCacheData& data : dex_caches_) {
-      if (!self->IsJWeakCleared(data.weak_root) &&
-          proxy_method->HasSameDexCacheResolvedTypes(data.resolved_types,
-                                                     image_pointer_size_)) {
-        mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
-            self->DecodeJObject(data.weak_root));
-        if (dex_cache != nullptr) {
-          ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
-              proxy_method->GetDexMethodIndex(), image_pointer_size_);
-          CHECK(resolved_method != nullptr);
-          return resolved_method;
-        }
-      }
-    }
-  }
-  LOG(FATAL) << "Didn't find dex cache for " << PrettyClass(proxy_class) << " "
-      << PrettyMethod(proxy_method);
-  UNREACHABLE();
-}
-
 void ClassLinker::CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out) {
   // Create constructor for Proxy that must initialize the method.
   CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 18u);
@@ -4842,7 +4858,7 @@
 }
 
 static bool HasSameSignatureWithDifferentClassLoaders(Thread* self,
-                                                      size_t pointer_size,
+                                                      PointerSize pointer_size,
                                                       Handle<mirror::Class> klass,
                                                       Handle<mirror::Class> super_klass,
                                                       ArtMethod* method1,
@@ -4982,6 +4998,7 @@
   DCHECK(c.Get() != nullptr);
   if (c->IsInitialized()) {
     EnsureSkipAccessChecksMethods(c);
+    self->AssertNoPendingException();
     return true;
   }
   const bool success = InitializeClass(self, c, can_init_fields, can_init_parents);
@@ -5057,7 +5074,7 @@
   return class_loader == nullptr ? &boot_class_table_ : class_loader->GetClassTable();
 }
 
-static ImTable* FindSuperImt(mirror::Class* klass, size_t pointer_size)
+static ImTable* FindSuperImt(mirror::Class* klass, PointerSize pointer_size)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   while (klass->HasSuperClass()) {
     klass = klass->GetSuperClass();
@@ -5595,7 +5612,7 @@
   LinkVirtualHashTable(Handle<mirror::Class> klass,
                        size_t hash_size,
                        uint32_t* hash_table,
-                       size_t image_pointer_size)
+                       PointerSize image_pointer_size)
      : klass_(klass),
        hash_size_(hash_size),
        hash_table_(hash_table),
@@ -5657,13 +5674,20 @@
   Handle<mirror::Class> klass_;
   const size_t hash_size_;
   uint32_t* const hash_table_;
-  const size_t image_pointer_size_;
+  const PointerSize image_pointer_size_;
 };
 
 const uint32_t LinkVirtualHashTable::invalid_index_ = std::numeric_limits<uint32_t>::max();
 const uint32_t LinkVirtualHashTable::removed_index_ = std::numeric_limits<uint32_t>::max() - 1;
 
-bool ClassLinker::LinkVirtualMethods(
+// b/30419309
+#if defined(__i386__)
+#define X86_OPTNONE __attribute__((optnone))
+#else
+#define X86_OPTNONE
+#endif
+
+X86_OPTNONE bool ClassLinker::LinkVirtualMethods(
     Thread* self,
     Handle<mirror::Class> klass,
     /*out*/std::unordered_map<size_t, ClassLinker::MethodTranslation>* default_translations) {
@@ -5911,7 +5935,7 @@
                                        Handle<mirror::IfTable> iftable,
                                        size_t ifstart,
                                        Handle<mirror::Class> iface,
-                                       size_t image_pointer_size)
+                                       PointerSize image_pointer_size)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(self != nullptr);
   DCHECK(iface.Get() != nullptr);
@@ -6060,7 +6084,7 @@
                                                  ArtMethod* interface_method,
                                                  ArtMethod* method,
                                                  bool force_new_conflict_method) {
-  ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
+  ImtConflictTable* current_table = conflict_method->GetImtConflictTable(kRuntimePointerSize);
   Runtime* const runtime = Runtime::Current();
   LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
   bool new_entry = conflict_method == runtime->GetImtConflictMethod() || force_new_conflict_method;
@@ -6187,14 +6211,9 @@
   }
 }
 
-static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  return interface_method->GetDexMethodIndex() % ImTable::kSize;
-}
-
 ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
                                                       LinearAlloc* linear_alloc,
-                                                      size_t image_pointer_size) {
+                                                      PointerSize image_pointer_size) {
   void* data = linear_alloc->Alloc(Thread::Current(),
                                    ImtConflictTable::ComputeSize(count,
                                                                  image_pointer_size));
@@ -6243,7 +6262,7 @@
       // or interface methods in the IMT here they will not create extra conflicts since we compare
       // names and signatures in SetIMTRef.
       ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-      const uint32_t imt_index = GetIMTIndex(interface_method);
+      const uint32_t imt_index = interface_method->GetImtIndex();
 
       // There is only any conflicts if all of the interface methods for an IMT slot don't have
       // the same implementation method, keep track of this to avoid creating a conflict table in
@@ -6297,7 +6316,7 @@
         }
         DCHECK(implementation_method != nullptr);
         ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-        const uint32_t imt_index = GetIMTIndex(interface_method);
+        const uint32_t imt_index = interface_method->GetImtIndex();
         if (!imt[imt_index]->IsRuntimeMethod() ||
             imt[imt_index] == unimplemented_method ||
             imt[imt_index] == imt_conflict_method) {
@@ -6527,7 +6546,7 @@
   return nullptr;
 }
 
-static void SanityCheckVTable(Handle<mirror::Class> klass, uint32_t pointer_size)
+static void SanityCheckVTable(Handle<mirror::Class> klass, PointerSize pointer_size)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   mirror::PointerArray* check_vtable = klass->GetVTableDuringLinking();
   mirror::Class* superclass = (klass->HasSuperClass()) ? klass->GetSuperClass() : nullptr;
@@ -6703,7 +6722,7 @@
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
         MethodNameAndSignatureComparator interface_name_comparator(
             interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-        uint32_t imt_index = GetIMTIndex(interface_method);
+        uint32_t imt_index = interface_method->GetImtIndex();
         ArtMethod** imt_ptr = &out_imt[imt_index];
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
@@ -6786,7 +6805,9 @@
             // If the super-classes method is override-able by a default method we need to keep
             // track of it since though it is override-able it is not guaranteed to be 'overridden'.
             // If it turns out not to be overridden and we did not keep track of it we might add it
-            // to the vtable twice, causing corruption in this class and possibly any subclasses.
+            // to the vtable twice, causing corruption (vtable entries having inconsistent and
+            // illegal states, incorrect vtable size, and incorrect or inconsistent iftable entries)
+            // in this class and any subclasses.
             DCHECK(vtable_impl == nullptr || vtable_impl == supers_method)
                 << "vtable_impl was " << PrettyMethod(vtable_impl) << " and not 'nullptr' or "
                 << PrettyMethod(supers_method) << " as expected. IFTable appears to be corrupt!";
@@ -7726,7 +7747,7 @@
   }
 
   if (is_static) {
-    resolved = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+    resolved = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
   } else {
     resolved = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
@@ -8119,19 +8140,27 @@
 
 void ClassLinker::CleanupClassLoaders() {
   Thread* const self = Thread::Current();
-  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
-    const ClassLoaderData& data = *it;
-    // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
-    auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
-    if (class_loader != nullptr) {
-      ++it;
-    } else {
-      VLOG(class_linker) << "Freeing class loader";
-      DeleteClassLoader(self, data);
-      it = class_loaders_.erase(it);
+  std::vector<ClassLoaderData> to_delete;
+  // Do the delete outside the lock to avoid lock violation in jit code cache.
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
+      const ClassLoaderData& data = *it;
+      // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
+      auto* const class_loader =
+          down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
+      if (class_loader != nullptr) {
+        ++it;
+      } else {
+        VLOG(class_linker) << "Freeing class loader";
+        to_delete.push_back(data);
+        it = class_loaders_.erase(it);
+      }
     }
   }
+  for (ClassLoaderData& data : to_delete) {
+    DeleteClassLoader(self, data);
+  }
 }
 
 std::set<DexCacheResolvedClasses> ClassLinker::GetResolvedClasses(bool ignore_boot_classes) {
@@ -8250,6 +8279,33 @@
   return ret;
 }
 
+class ClassLinker::FindVirtualMethodHolderVisitor : public ClassVisitor {
+ public:
+  FindVirtualMethodHolderVisitor(const ArtMethod* method, PointerSize pointer_size)
+      : method_(method),
+        pointer_size_(pointer_size) {}
+
+  bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) OVERRIDE {
+    if (klass->GetVirtualMethodsSliceUnchecked(pointer_size_).Contains(method_)) {
+      holder_ = klass;
+    }
+    // Return false to stop searching if holder_ is not null.
+    return holder_ == nullptr;
+  }
+
+  mirror::Class* holder_ = nullptr;
+  const ArtMethod* const method_;
+  const PointerSize pointer_size_;
+};
+
+mirror::Class* ClassLinker::GetHoldingClassOfCopiedMethod(ArtMethod* method) {
+  ScopedTrace trace(__FUNCTION__);  // Since this function is slow, have a trace to notify people.
+  CHECK(method->IsCopied());
+  FindVirtualMethodHolderVisitor visitor(method, image_pointer_size_);
+  VisitClasses(&visitor);
+  return visitor.holder_;
+}
+
 // Instantiate ResolveMethod.
 template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kForceICCECheck>(
     const DexFile& dex_file,
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 4305dc6..c3ab8c5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -25,6 +25,7 @@
 #include <vector>
 
 #include "base/allocator.h"
+#include "base/enums.h"
 #include "base/hash_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -255,7 +256,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
-  // result in the DexCache. The referrer is used to identity the
+  // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
   mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx, mirror::Class* referrer)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -483,6 +484,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ArtMethod* FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -565,8 +567,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  size_t GetImagePointerSize() const {
-    DCHECK(ValidPointerSize(image_pointer_size_)) << image_pointer_size_;
+  PointerSize GetImagePointerSize() const {
     return image_pointer_size_;
   }
 
@@ -629,7 +630,7 @@
   // Static version for when the class linker is not yet created.
   static ImtConflictTable* CreateImtConflictTable(size_t count,
                                                   LinearAlloc* linear_alloc,
-                                                  size_t pointer_size);
+                                                  PointerSize pointer_size);
 
 
   // Create the IMT and conflict tables for a class.
@@ -647,6 +648,10 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
+  // Get the actual holding class for a copied method. Pretty slow, don't call often.
+  mirror::Class* GetHoldingClassOfCopiedMethod(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   struct DexCacheData {
     // Weak root to the DexCache. Note: Do not decode this unnecessarily or else class unloading may
     // not work properly.
@@ -675,7 +680,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void DeleteClassLoader(Thread* self, const ClassLoaderData& data)
-      REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void VisitClassLoaders(ClassLoaderVisitor* visitor) const
@@ -1165,8 +1169,9 @@
   const void* quick_to_interpreter_bridge_trampoline_;
 
   // Image pointer size.
-  size_t image_pointer_size_;
+  PointerSize image_pointer_size_;
 
+  class FindVirtualMethodHolderVisitor;
   friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 48b6316..7999aef 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -21,6 +21,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
@@ -147,7 +148,7 @@
     EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
     EXPECT_EQ(0U, JavaLangObject->NumDirectInterfaces());
 
-    size_t pointer_size = class_linker_->GetImagePointerSize();
+    PointerSize pointer_size = class_linker_->GetImagePointerSize();
     ArtMethod* unimplemented = runtime_->GetImtUnimplementedMethod();
     ImTable* imt = JavaLangObject->GetImt(pointer_size);
     ASSERT_NE(nullptr, imt);
@@ -216,7 +217,7 @@
     mirror::Class* array_ptr = array->GetComponentType();
     EXPECT_EQ(class_linker_->FindArrayClass(self, &array_ptr), array.Get());
 
-    size_t pointer_size = class_linker_->GetImagePointerSize();
+    PointerSize pointer_size = class_linker_->GetImagePointerSize();
     mirror::Class* JavaLangObject =
         class_linker_->FindSystemClass(self, "Ljava/lang/Object;");
     ImTable* JavaLangObject_imt = JavaLangObject->GetImt(pointer_size);
@@ -230,14 +231,14 @@
     EXPECT_TRUE(method->GetName() != nullptr);
     EXPECT_TRUE(method->GetSignature() != Signature::NoSignature());
 
-    EXPECT_TRUE(method->HasDexCacheResolvedMethods(sizeof(void*)));
-    EXPECT_TRUE(method->HasDexCacheResolvedTypes(sizeof(void*)));
+    EXPECT_TRUE(method->HasDexCacheResolvedMethods(kRuntimePointerSize));
+    EXPECT_TRUE(method->HasDexCacheResolvedTypes(kRuntimePointerSize));
     EXPECT_TRUE(method->HasSameDexCacheResolvedMethods(
         method->GetDeclaringClass()->GetDexCache()->GetResolvedMethods(),
-        sizeof(void*)));
+        kRuntimePointerSize));
     EXPECT_TRUE(method->HasSameDexCacheResolvedTypes(
         method->GetDeclaringClass()->GetDexCache()->GetResolvedTypes(),
-        sizeof(void*)));
+        kRuntimePointerSize));
   }
 
   void AssertField(mirror::Class* klass, ArtField* field)
@@ -275,7 +276,7 @@
     if (klass->IsInterface()) {
       EXPECT_TRUE(klass->IsAbstract());
       // Check that all direct methods are static (either <clinit> or a regular static method).
-      for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+      for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
         EXPECT_TRUE(m.IsStatic());
         EXPECT_TRUE(m.IsDirect());
       }
@@ -312,19 +313,19 @@
     EXPECT_FALSE(klass->IsPrimitive());
     EXPECT_TRUE(klass->CanAccess(klass.Get()));
 
-    for (ArtMethod& method : klass->GetDirectMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetDirectMethods(kRuntimePointerSize)) {
       AssertMethod(&method);
       EXPECT_TRUE(method.IsDirect());
       EXPECT_EQ(klass.Get(), method.GetDeclaringClass());
     }
 
-    for (ArtMethod& method : klass->GetDeclaredVirtualMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetDeclaredVirtualMethods(kRuntimePointerSize)) {
       AssertMethod(&method);
       EXPECT_FALSE(method.IsDirect());
       EXPECT_EQ(klass.Get(), method.GetDeclaringClass());
     }
 
-    for (ArtMethod& method : klass->GetCopiedMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetCopiedMethods(kRuntimePointerSize)) {
       AssertMethod(&method);
       EXPECT_FALSE(method.IsDirect());
       EXPECT_TRUE(method.IsCopied());
@@ -435,7 +436,7 @@
     auto* resolved_methods = dex_cache->GetResolvedMethods();
     for (size_t i = 0, num_methods = dex_cache->NumResolvedMethods(); i != num_methods; ++i) {
       EXPECT_TRUE(
-          mirror::DexCache::GetElementPtrSize(resolved_methods, i, sizeof(void*)) != nullptr)
+          mirror::DexCache::GetElementPtrSize(resolved_methods, i, kRuntimePointerSize) != nullptr)
           << dex.GetLocation() << " i=" << i;
     }
   }
@@ -565,7 +566,6 @@
 struct ClassOffsets : public CheckOffsets<mirror::Class> {
   ClassOffsets() : CheckOffsets<mirror::Class>(false, "Ljava/lang/Class;") {
     addOffset(OFFSETOF_MEMBER(mirror::Class, access_flags_), "accessFlags");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, annotation_type_), "annotationType");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_flags_), "classFlags");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_loader_), "classLoader");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_size_), "classSize");
@@ -929,7 +929,7 @@
   // Static final primitives that are initialized by a compile-time constant
   // expression resolve to a copy of a constant value from the constant pool.
   // So <clinit> should be null.
-  ArtMethod* clinit = statics->FindDirectMethod("<clinit>", "()V", sizeof(void*));
+  ArtMethod* clinit = statics->FindDirectMethod("<clinit>", "()V", kRuntimePointerSize);
   EXPECT_TRUE(clinit == nullptr);
 
   EXPECT_EQ(9U, statics->NumStaticFields());
@@ -1016,15 +1016,15 @@
   EXPECT_TRUE(J->IsAssignableFrom(B.Get()));
 
   const Signature void_sig = I->GetDexCache()->GetDexFile()->CreateSignature("()V");
-  ArtMethod* Ii = I->FindVirtualMethod("i", void_sig, sizeof(void*));
-  ArtMethod* Jj1 = J->FindVirtualMethod("j1", void_sig, sizeof(void*));
-  ArtMethod* Jj2 = J->FindVirtualMethod("j2", void_sig, sizeof(void*));
-  ArtMethod* Kj1 = K->FindInterfaceMethod("j1", void_sig, sizeof(void*));
-  ArtMethod* Kj2 = K->FindInterfaceMethod("j2", void_sig, sizeof(void*));
-  ArtMethod* Kk = K->FindInterfaceMethod("k", void_sig, sizeof(void*));
-  ArtMethod* Ai = A->FindVirtualMethod("i", void_sig, sizeof(void*));
-  ArtMethod* Aj1 = A->FindVirtualMethod("j1", void_sig, sizeof(void*));
-  ArtMethod* Aj2 = A->FindVirtualMethod("j2", void_sig, sizeof(void*));
+  ArtMethod* Ii = I->FindVirtualMethod("i", void_sig, kRuntimePointerSize);
+  ArtMethod* Jj1 = J->FindVirtualMethod("j1", void_sig, kRuntimePointerSize);
+  ArtMethod* Jj2 = J->FindVirtualMethod("j2", void_sig, kRuntimePointerSize);
+  ArtMethod* Kj1 = K->FindInterfaceMethod("j1", void_sig, kRuntimePointerSize);
+  ArtMethod* Kj2 = K->FindInterfaceMethod("j2", void_sig, kRuntimePointerSize);
+  ArtMethod* Kk = K->FindInterfaceMethod("k", void_sig, kRuntimePointerSize);
+  ArtMethod* Ai = A->FindVirtualMethod("i", void_sig, kRuntimePointerSize);
+  ArtMethod* Aj1 = A->FindVirtualMethod("j1", void_sig, kRuntimePointerSize);
+  ArtMethod* Aj2 = A->FindVirtualMethod("j2", void_sig, kRuntimePointerSize);
   ASSERT_TRUE(Ii != nullptr);
   ASSERT_TRUE(Jj1 != nullptr);
   ASSERT_TRUE(Jj2 != nullptr);
@@ -1039,12 +1039,12 @@
   EXPECT_NE(Jj2, Aj2);
   EXPECT_EQ(Kj1, Jj1);
   EXPECT_EQ(Kj2, Jj2);
-  EXPECT_EQ(Ai, A->FindVirtualMethodForInterface(Ii, sizeof(void*)));
-  EXPECT_EQ(Aj1, A->FindVirtualMethodForInterface(Jj1, sizeof(void*)));
-  EXPECT_EQ(Aj2, A->FindVirtualMethodForInterface(Jj2, sizeof(void*)));
-  EXPECT_EQ(Ai, A->FindVirtualMethodForVirtualOrInterface(Ii, sizeof(void*)));
-  EXPECT_EQ(Aj1, A->FindVirtualMethodForVirtualOrInterface(Jj1, sizeof(void*)));
-  EXPECT_EQ(Aj2, A->FindVirtualMethodForVirtualOrInterface(Jj2, sizeof(void*)));
+  EXPECT_EQ(Ai, A->FindVirtualMethodForInterface(Ii, kRuntimePointerSize));
+  EXPECT_EQ(Aj1, A->FindVirtualMethodForInterface(Jj1, kRuntimePointerSize));
+  EXPECT_EQ(Aj2, A->FindVirtualMethodForInterface(Jj2, kRuntimePointerSize));
+  EXPECT_EQ(Ai, A->FindVirtualMethodForVirtualOrInterface(Ii, kRuntimePointerSize));
+  EXPECT_EQ(Aj1, A->FindVirtualMethodForVirtualOrInterface(Jj1, kRuntimePointerSize));
+  EXPECT_EQ(Aj2, A->FindVirtualMethodForVirtualOrInterface(Jj2, kRuntimePointerSize));
 
   ArtField* Afoo = mirror::Class::FindStaticField(soa.Self(), A, "foo", "Ljava/lang/String;");
   ArtField* Bfoo = mirror::Class::FindStaticField(soa.Self(), B, "foo", "Ljava/lang/String;");
@@ -1069,8 +1069,8 @@
   Handle<mirror::ClassLoader> class_loader(
       hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
   mirror::Class* klass = class_linker_->FindClass(soa.Self(), "LStaticsFromCode;", class_loader);
-  ArtMethod* clinit = klass->FindClassInitializer(sizeof(void*));
-  ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;", sizeof(void*));
+  ArtMethod* clinit = klass->FindClassInitializer(kRuntimePointerSize);
+  ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;", kRuntimePointerSize);
   const DexFile::TypeId* type_id = dex_file->FindTypeId("LStaticsFromCode;");
   ASSERT_TRUE(type_id != nullptr);
   uint32_t type_idx = dex_file->GetIndexForTypeId(*type_id);
@@ -1134,19 +1134,19 @@
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Class;", class_loader);
   ASSERT_TRUE(c != nullptr);
-  EXPECT_EQ(c->GetClassSize(), mirror::Class::ClassClassSize(sizeof(void*)));
+  EXPECT_EQ(c->GetClassSize(), mirror::Class::ClassClassSize(kRuntimePointerSize));
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Object;", class_loader);
   ASSERT_TRUE(c != nullptr);
-  EXPECT_EQ(c->GetClassSize(), mirror::Object::ClassSize(sizeof(void*)));
+  EXPECT_EQ(c->GetClassSize(), mirror::Object::ClassSize(kRuntimePointerSize));
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/String;", class_loader);
   ASSERT_TRUE(c != nullptr);
-  EXPECT_EQ(c->GetClassSize(), mirror::String::ClassSize(sizeof(void*)));
+  EXPECT_EQ(c->GetClassSize(), mirror::String::ClassSize(kRuntimePointerSize));
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/DexCache;", class_loader);
   ASSERT_TRUE(c != nullptr);
-  EXPECT_EQ(c->GetClassSize(), mirror::DexCache::ClassSize(sizeof(void*)));
+  EXPECT_EQ(c->GetClassSize(), mirror::DexCache::ClassSize(kRuntimePointerSize));
 }
 
 static void CheckMethod(ArtMethod* method, bool verified)
@@ -1161,7 +1161,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   EXPECT_EQ((c->GetAccessFlags() & kAccVerificationAttempted) != 0U, preverified)
       << "Class " << PrettyClass(c) << " not as expected";
-  for (auto& m : c->GetMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(kRuntimePointerSize)) {
     CheckMethod(&m, preverified);
   }
 }
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 3a1b650..741b682 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -284,7 +284,8 @@
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::string error_msg;
   MemMap::Init();
-  if (!DexFile::Open(location, location, &error_msg, &dex_files)) {
+  static constexpr bool kVerifyChecksum = true;
+  if (!DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files)) {
     LOG(FATAL) << "Could not open .dex file '" << location << "': " << error_msg << "\n";
     UNREACHABLE();
   } else {
@@ -480,9 +481,11 @@
 std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTestImpl::OpenTestDexFiles(
     const char* name) {
   std::string filename = GetTestDexFileName(name);
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  bool success = DexFile::Open(filename.c_str(), filename.c_str(), &error_msg, &dex_files);
+  bool success = DexFile::Open(
+      filename.c_str(), filename.c_str(), kVerifyChecksum, &error_msg, &dex_files);
   CHECK(success) << "Failed to open '" << filename << "': " << error_msg;
   for (auto& dex_file : dex_files) {
     CHECK_EQ(PROT_READ, dex_file->GetPermissions());
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index b68eb19..f445e52 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -141,11 +141,13 @@
 
   std::unique_ptr<CompilerCallbacks> callbacks_;
 
-  void SetUp();
+  virtual void SetUp();
 
-  void TearDown();
+  virtual void TearDown();
 
-  void FinalizeSetup();
+  // Called to finish up runtime creation and filling test fields. By default runs root
+  // initializers, initialize well-known classes, and creates the heap thread pool.
+  virtual void FinalizeSetup();
 
  private:
   static std::string GetCoreFileLocation(const char* suffix);
@@ -160,19 +162,13 @@
   virtual ~CommonRuntimeTestBase() {}
 
  protected:
-  virtual void SetUp() {
+  virtual void SetUp() OVERRIDE {
     CommonRuntimeTestImpl::SetUp();
   }
 
-  virtual void TearDown() {
+  virtual void TearDown() OVERRIDE {
     CommonRuntimeTestImpl::TearDown();
   }
-
-  // Called to finish up runtime creation and filling test fields. By default runs root
-  // initializers, initialize well-known classes, and creates the heap thread pool.
-  virtual void FinalizeSetup() {
-    CommonRuntimeTestImpl::FinalizeSetup();
-  }
 };
 
 using CommonRuntimeTest = CommonRuntimeTestBase<testing::Test>;
@@ -205,6 +201,12 @@
     return; \
   }
 
+#define TEST_DISABLED_FOR_READ_BARRIER_ON_X86() \
+  if (kUseReadBarrier && kRuntimeISA == kX86) { \
+    printf("WARNING: TEST DISABLED FOR READ BARRIER ON X86\n"); \
+    return; \
+  }
+
 }  // namespace art
 
 namespace std {
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index a8d0f3a..99732c6 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -150,7 +150,8 @@
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/ClassFormatError;", referrer, fmt, &args);
-  va_end(args);}
+  va_end(args);
+}
 
 // IllegalAccessError
 
@@ -401,12 +402,117 @@
                                                dex_file, type);
 }
 
-void ThrowNullPointerExceptionFromDexPC() {
+static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instruction& instr)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (!CanDoImplicitNullCheckOn(addr)) {
+    return false;
+  }
+
+  switch (instr.Opcode()) {
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      // Without inlining, we could just check that the offset is the class offset.
+      // However, when inlining, the compiler can (validly) merge the null check with a field access
+      // on the same object. Note that the stack map at the NPE will reflect the invoke's location,
+      // which is the caller.
+      return true;
+    }
+
+    case Instruction::IGET:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_SHORT:
+    case Instruction::IPUT:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_SHORT: {
+      ArtField* field =
+          Runtime::Current()->GetClassLinker()->ResolveField(instr.VRegC_22c(), method, false);
+      return (addr == 0) ||
+          (addr == field->GetOffset().Uint32Value()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    case Instruction::IGET_QUICK:
+    case Instruction::IGET_BOOLEAN_QUICK:
+    case Instruction::IGET_BYTE_QUICK:
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT_QUICK:
+    case Instruction::IGET_WIDE_QUICK:
+    case Instruction::IGET_OBJECT_QUICK:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT_QUICK:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::IPUT_OBJECT_QUICK: {
+      return (addr == 0u) ||
+          (addr == instr.VRegC_22c()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    case Instruction::AGET:
+    case Instruction::AGET_WIDE:
+    case Instruction::AGET_OBJECT:
+    case Instruction::AGET_BOOLEAN:
+    case Instruction::AGET_BYTE:
+    case Instruction::AGET_CHAR:
+    case Instruction::AGET_SHORT:
+    case Instruction::APUT:
+    case Instruction::APUT_WIDE:
+    case Instruction::APUT_OBJECT:
+    case Instruction::APUT_BOOLEAN:
+    case Instruction::APUT_BYTE:
+    case Instruction::APUT_CHAR:
+    case Instruction::APUT_SHORT:
+    case Instruction::FILL_ARRAY_DATA:
+    case Instruction::ARRAY_LENGTH: {
+      // The length access should crash. We currently do not do implicit checks on
+      // the array access itself.
+      return (addr == 0u) ||
+          (addr == mirror::Array::LengthOffset().Uint32Value()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    default: {
+      // We have covered all the cases where an NPE could occur.
+      // Note that this must be kept in sync with the compiler, and adding
+      // any new way to do implicit checks in the compiler should also update
+      // this code.
+      return false;
+    }
+  }
+}
+
+void ThrowNullPointerExceptionFromDexPC(bool check_address, uintptr_t addr) {
   uint32_t throw_dex_pc;
   ArtMethod* method = Thread::Current()->GetCurrentMethod(&throw_dex_pc);
   const DexFile::CodeItem* code = method->GetCodeItem();
   CHECK_LT(throw_dex_pc, code->insns_size_in_code_units_);
   const Instruction* instr = Instruction::At(&code->insns_[throw_dex_pc]);
+  if (check_address && !IsValidImplicitCheck(addr, method, *instr)) {
+    const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+    LOG(FATAL) << "Invalid address for an implicit NullPointerException check: "
+               << "0x" << std::hex << addr << std::dec
+               << ", at "
+               << instr->DumpString(dex_file)
+               << " in "
+               << PrettyMethod(method);
+  }
+
   switch (instr->Opcode()) {
     case Instruction::INVOKE_DIRECT:
       ThrowNullPointerExceptionForMethodAccess(instr->VRegB_35c(), kDirect);
@@ -529,14 +635,24 @@
       ThrowException("Ljava/lang/NullPointerException;", nullptr,
                      "Attempt to get length of null array");
       break;
+    case Instruction::FILL_ARRAY_DATA: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Attempt to write to null array");
+      break;
+    }
+    case Instruction::MONITOR_ENTER:
+    case Instruction::MONITOR_EXIT: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Attempt to do a synchronize operation on a null object");
+      break;
+    }
     default: {
-      // TODO: We should have covered all the cases where we expect a NPE above, this
-      //       message/logging is so we can improve any cases we've missed in the future.
       const DexFile* dex_file =
           method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-      ThrowException("Ljava/lang/NullPointerException;", nullptr,
-                     StringPrintf("Null pointer exception during instruction '%s'",
-                                  instr->DumpString(dex_file).c_str()).c_str());
+      LOG(FATAL) << "NullPointerException at an unexpected instruction: "
+                 << instr->DumpString(dex_file)
+                 << " in "
+                 << PrettyMethod(method);
       break;
     }
   }
@@ -653,6 +769,13 @@
   }
 }
 
+// StringIndexOutOfBoundsException
+
+void ThrowStringIndexOutOfBoundsException(int index, int length) {
+  ThrowException("Ljava/lang/StringIndexOutOfBoundsException;", nullptr,
+                 StringPrintf("length=%d; index=%d", length, index).c_str());
+}
+
 // VerifyError
 
 void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index c3a1f09..cbd338d 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -195,7 +195,7 @@
                                               InvokeType type)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowNullPointerExceptionFromDexPC()
+void ThrowNullPointerExceptionFromDexPC(bool check_address = false, uintptr_t addr = 0)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowNullPointerException(const char* msg)
@@ -211,6 +211,11 @@
 
 void ThrowStackOverflowError(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+// StringIndexOutOfBoundsException
+
+void ThrowStringIndexOutOfBoundsException(int index, int length)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // VerifyError
 
 void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...)
diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h
index 65d1d98..37631cc 100644
--- a/runtime/compiler_filter.h
+++ b/runtime/compiler_filter.h
@@ -44,6 +44,8 @@
     kEverything,          // Compile everything capable of being compiled.
   };
 
+  static const Filter kDefaultCompilerFilter = kSpeed;
+
   // Returns true if an oat file with this compiler filter contains
   // compiled executable code for bytecode.
   static bool IsBytecodeCompilationEnabled(Filter filter);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 1d93580..89bebb4 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -23,6 +23,7 @@
 #include "arch/context.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/time_utils.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -79,7 +80,7 @@
     mirror::Class* declaring_class = m->GetDeclaringClass();
     return declaring_class->FindDeclaredVirtualMethod(declaring_class->GetDexCache(),
                                                       m->GetDexMethodIndex(),
-                                                      sizeof(void*));
+                                                      kRuntimePointerSize);
   }
 }
 
@@ -644,8 +645,7 @@
 
   LOG(INFO) << "Debugger is no longer active";
 
-  // Suspend all threads and exclusively acquire the mutator lock. Set the state of the thread
-  // to kRunnable to avoid scoped object access transitions. Remove the debugger as a listener
+  // Suspend all threads and exclusively acquire the mutator lock. Remove the debugger as a listener
   // and clear the object registry.
   Runtime* runtime = Runtime::Current();
   Thread* self = Thread::Current();
@@ -655,7 +655,6 @@
                                     gc::kGcCauseInstrumentation,
                                     gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
-    ThreadState old_state = self->SetStateUnsafe(kRunnable);
     // Debugger may not be active at this point.
     if (IsDebuggerActive()) {
       {
@@ -676,7 +675,6 @@
       }
       gDebuggerActive = false;
     }
-    CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   }
 
   {
@@ -1409,7 +1407,7 @@
   if (m == nullptr) {
     return "null";
   }
-  return m->GetInterfaceMethodIfProxy(sizeof(void*))->GetName();
+  return m->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetName();
 }
 
 std::string Dbg::GetFieldName(JDWP::FieldId field_id) {
@@ -1456,6 +1454,15 @@
   }
 }
 
+static size_t GetMethodNumArgRegistersIncludingThis(ArtMethod* method)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t num_registers = ArtMethod::NumArgRegisters(method->GetShorty());
+  if (!method->IsStatic()) {
+    ++num_registers;
+  }
+  return num_registers;
+}
+
 /*
  * Circularly shifts registers so that arguments come last. Reverts
  * slots to dex style argument placement.
@@ -1467,7 +1474,7 @@
     // We should not get here for a method without code (native, proxy or abstract). Log it and
     // return the slot as is since all registers are arguments.
     LOG(WARNING) << "Trying to demangle slot for method without code " << PrettyMethod(m);
-    uint16_t vreg_count = ArtMethod::NumArgRegisters(m->GetShorty());
+    uint16_t vreg_count = GetMethodNumArgRegistersIncludingThis(m);
     if (slot < vreg_count) {
       *error = JDWP::ERR_NONE;
       return slot;
@@ -1529,9 +1536,9 @@
   auto ptr_size = cl->GetImagePointerSize();
   for (ArtMethod& m : c->GetMethods(ptr_size)) {
     expandBufAddMethodId(pReply, ToMethodId(&m));
-    expandBufAddUtf8String(pReply, m.GetInterfaceMethodIfProxy(sizeof(void*))->GetName());
-    expandBufAddUtf8String(pReply,
-                           m.GetInterfaceMethodIfProxy(sizeof(void*))->GetSignature().ToString());
+    expandBufAddUtf8String(pReply, m.GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetName());
+    expandBufAddUtf8String(
+        pReply, m.GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetSignature().ToString());
     if (with_generic) {
       const char* generic_signature = "";
       expandBufAddUtf8String(pReply, generic_signature);
@@ -1639,8 +1646,7 @@
 
   // arg_count considers doubles and longs to take 2 units.
   // variable_count considers everything to take 1 unit.
-  std::string shorty(m->GetShorty());
-  expandBufAdd4BE(pReply, ArtMethod::NumArgRegisters(shorty));
+  expandBufAdd4BE(pReply, GetMethodNumArgRegistersIncludingThis(m));
 
   // We don't know the total number of variables yet, so leave a blank and update it later.
   size_t variable_count_offset = expandBufGetLength(pReply);
@@ -2362,6 +2368,10 @@
 }
 
 void Dbg::SuspendVM() {
+  // Avoid a deadlock between GC and debugger where GC gets suspended during GC. b/25800335.
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseDebugger,
+                                  gc::kCollectorTypeDebugger);
   Runtime::Current()->GetThreadList()->SuspendAllForDebugger();
 }
 
@@ -3933,7 +3943,7 @@
           mirror::Class* parameter_type =
               m->GetClassFromTypeIndex(types->GetTypeItem(i).type_idx_,
                                        true /* resolve */,
-                                       sizeof(void*));
+                                       kRuntimePointerSize);
           mirror::Object* argument = gRegistry->Get<mirror::Object*>(arg_values[i], &error);
           if (error != JDWP::ERR_NONE) {
             return JDWP::ERR_INVALID_OBJECT;
@@ -4024,7 +4034,7 @@
 
   // Translate the method through the vtable, unless the debugger wants to suppress it.
   ArtMethod* m = pReq->method;
-  size_t image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   if ((pReq->options & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver.Read() != nullptr) {
     ArtMethod* actual_method =
         pReq->klass.Read()->FindVirtualMethodForVirtualOrInterface(m, image_pointer_size);
@@ -4108,6 +4118,8 @@
   // Suspend other threads if the invoke is not single-threaded.
   if ((pReq->options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
     ScopedThreadSuspension sts(soa.Self(), kWaitingForDebuggerSuspension);
+    // Avoid a deadlock between GC and debugger where GC gets suspended during GC. b/25800335.
+    gc::ScopedGCCriticalSection gcs(soa.Self(), gc::kGcCauseDebugger, gc::kCollectorTypeDebugger);
     VLOG(jdwp) << "      Suspending all threads";
     Runtime::Current()->GetThreadList()->SuspendAllForDebugger();
   }
@@ -5072,7 +5084,7 @@
   ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   BufferedRootVisitor<128> root_visitor(visitor, RootInfo(kRootVMInternal));
   for (Breakpoint& breakpoint : gBreakpoints) {
-    breakpoint.Method()->VisitRoots(root_visitor, sizeof(void*));
+    breakpoint.Method()->VisitRoots(root_visitor, kRuntimePointerSize);
   }
 }
 
diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h
index 801d857..d717ec0 100644
--- a/runtime/dex2oat_environment_test.h
+++ b/runtime/dex2oat_environment_test.h
@@ -67,14 +67,15 @@
 
     // GetMultiDexSrc2 should have the same primary dex checksum as
     // GetMultiDexSrc1, but a different secondary dex checksum.
+    static constexpr bool kVerifyChecksum = true;
     std::vector<std::unique_ptr<const DexFile>> multi1;
     ASSERT_TRUE(DexFile::Open(GetMultiDexSrc1().c_str(),
-          GetMultiDexSrc1().c_str(), &error_msg, &multi1)) << error_msg;
+          GetMultiDexSrc1().c_str(), kVerifyChecksum, &error_msg, &multi1)) << error_msg;
     ASSERT_GT(multi1.size(), 1u);
 
     std::vector<std::unique_ptr<const DexFile>> multi2;
     ASSERT_TRUE(DexFile::Open(GetMultiDexSrc2().c_str(),
-          GetMultiDexSrc2().c_str(), &error_msg, &multi2)) << error_msg;
+          GetMultiDexSrc2().c_str(), kVerifyChecksum, &error_msg, &multi2)) << error_msg;
     ASSERT_GT(multi2.size(), 1u);
 
     ASSERT_EQ(multi1[0]->GetLocationChecksum(), multi2[0]->GetLocationChecksum());
@@ -136,7 +137,20 @@
   }
 
   bool GetCachedImageFile(/*out*/std::string* image, std::string* error_msg) const {
-    std::string cache = GetDalvikCache(GetInstructionSetString(kRuntimeISA), true);
+    std::string cache;
+    bool have_android_data;
+    bool dalvik_cache_exists;
+    bool is_global_cache;
+    GetDalvikCache(GetInstructionSetString(kRuntimeISA),
+                   true,
+                   &cache,
+                   &have_android_data,
+                   &dalvik_cache_exists,
+                   &is_global_cache);
+    if (!dalvik_cache_exists) {
+      *error_msg = "Failed to create dalvik cache";
+      return false;
+    }
     return GetDalvikCacheFilename(GetImageLocation().c_str(), cache.c_str(), image, error_msg);
   }
 
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 4e6c3ca..108a5af 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -38,10 +38,88 @@
   return reinterpret_cast<const char*>(ptr);
 }
 
+inline const char* DexFile::GetStringData(const StringId& string_id) const {
+  uint32_t ignored;
+  return GetStringDataAndUtf16Length(string_id, &ignored);
+}
+
+inline const char* DexFile::StringDataAndUtf16LengthByIdx(uint32_t idx,
+                                                          uint32_t* utf16_length) const {
+  if (idx == kDexNoIndex) {
+    *utf16_length = 0;
+    return nullptr;
+  }
+  const StringId& string_id = GetStringId(idx);
+  return GetStringDataAndUtf16Length(string_id, utf16_length);
+}
+
+inline const char* DexFile::StringDataByIdx(uint32_t idx) const {
+  uint32_t unicode_length;
+  return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
+}
+
+inline const char* DexFile::StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
+  const TypeId& type_id = GetTypeId(idx);
+  return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
+}
+
+inline const char* DexFile::StringByTypeIdx(uint32_t idx) const {
+  const TypeId& type_id = GetTypeId(idx);
+  return StringDataByIdx(type_id.descriptor_idx_);
+}
+
+inline const char* DexFile::GetTypeDescriptor(const TypeId& type_id) const {
+  return StringDataByIdx(type_id.descriptor_idx_);
+}
+
+inline const char* DexFile::GetFieldTypeDescriptor(const FieldId& field_id) const {
+  const DexFile::TypeId& type_id = GetTypeId(field_id.type_idx_);
+  return GetTypeDescriptor(type_id);
+}
+
+inline const char* DexFile::GetFieldName(const FieldId& field_id) const {
+  return StringDataByIdx(field_id.name_idx_);
+}
+
+inline const char* DexFile::GetMethodDeclaringClassDescriptor(const MethodId& method_id) const {
+  const DexFile::TypeId& type_id = GetTypeId(method_id.class_idx_);
+  return GetTypeDescriptor(type_id);
+}
+
 inline const Signature DexFile::GetMethodSignature(const MethodId& method_id) const {
   return Signature(this, GetProtoId(method_id.proto_idx_));
 }
 
+inline const char* DexFile::GetMethodName(const MethodId& method_id) const {
+  return StringDataByIdx(method_id.name_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(uint32_t idx) const {
+  return StringDataByIdx(GetProtoId(GetMethodId(idx).proto_idx_).shorty_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(const MethodId& method_id) const {
+  return StringDataByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(const MethodId& method_id, uint32_t* length) const {
+  // Using the UTF16 length is safe here as shorties are guaranteed to be ASCII characters.
+  return StringDataAndUtf16LengthByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_, length);
+}
+
+inline const char* DexFile::GetClassDescriptor(const ClassDef& class_def) const {
+  return StringByTypeIdx(class_def.class_idx_);
+}
+
+inline const char* DexFile::GetReturnTypeDescriptor(const ProtoId& proto_id) const {
+  return StringByTypeIdx(proto_id.return_type_idx_);
+}
+
+inline const char* DexFile::GetShorty(uint32_t proto_idx) const {
+  const ProtoId& proto_id = GetProtoId(proto_idx);
+  return StringDataByIdx(proto_id.shorty_idx_);
+}
+
 inline const DexFile::TryItem* DexFile::GetTryItems(const CodeItem& code_item, uint32_t offset) {
   const uint16_t* insns_end_ = &code_item.insns_[code_item.insns_size_in_code_units_];
   return reinterpret_cast<const TryItem*>
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 88696e9..90c678c 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -29,12 +29,14 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/file_magic.h"
 #include "base/hash_map.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "base/systrace.h"
+#include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_file_verifier.h"
@@ -54,11 +56,6 @@
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#include "ScopedFd.h"
-#pragma GCC diagnostic pop
-
 namespace art {
 
 const uint8_t DexFile::kDexMagic[] = { 'd', 'e', 'x', '\n' };
@@ -66,7 +63,9 @@
   {'0', '3', '5', '\0'},
   // Dex version 036 skipped because of an old dalvik bug on some versions of android where dex
   // files with that version number would erroneously be accepted and run.
-  {'0', '3', '7', '\0'}
+  {'0', '3', '7', '\0'},
+  // Dex version 038: Android "O" and beyond.
+  {'0', '3', '8', '\0'}
 };
 
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
@@ -85,14 +84,14 @@
     DCHECK_EQ(zip_entry_name[-1], kMultiDexSeparator);
   }
 
-  ScopedFd fd(OpenAndReadMagic(file_part, &magic, error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(file_part, &magic, error_msg);
+  if (fd.Fd() == -1) {
     DCHECK(!error_msg->empty());
     return false;
   }
   if (IsZipMagic(magic)) {
     std::unique_ptr<ZipArchive> zip_archive(
-        ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
+        ZipArchive::OpenFromFd(fd.Release(), filename, error_msg));
     if (zip_archive.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open zip archive '%s' (error msg: %s)", file_part,
                                 error_msg->c_str());
@@ -109,7 +108,7 @@
   }
   if (IsDexMagic(magic)) {
     std::unique_ptr<const DexFile> dex_file(
-        DexFile::OpenFile(fd.release(), filename, false, error_msg));
+        DexFile::OpenFile(fd.Release(), filename, false, false, error_msg));
     if (dex_file.get() == nullptr) {
       return false;
     }
@@ -120,21 +119,27 @@
   return false;
 }
 
-bool DexFile::Open(const char* filename, const char* location, std::string* error_msg,
+bool DexFile::Open(const char* filename,
+                   const char* location,
+                   bool verify_checksum,
+                   std::string* error_msg,
                    std::vector<std::unique_ptr<const DexFile>>* dex_files) {
   ScopedTrace trace(std::string("Open dex file ") + location);
   DCHECK(dex_files != nullptr) << "DexFile::Open: out-param is nullptr";
   uint32_t magic;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(filename, &magic, error_msg);
+  if (fd.Fd() == -1) {
     DCHECK(!error_msg->empty());
     return false;
   }
   if (IsZipMagic(magic)) {
-    return DexFile::OpenZip(fd.release(), location, error_msg, dex_files);
+    return DexFile::OpenZip(fd.Release(), location, verify_checksum, error_msg, dex_files);
   }
   if (IsDexMagic(magic)) {
-    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), location, true,
+    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.Release(),
+                                                              location,
+                                                              /* verify */ true,
+                                                              verify_checksum,
                                                               error_msg));
     if (dex_file.get() != nullptr) {
       dex_files->push_back(std::move(dex_file));
@@ -160,12 +165,12 @@
 bool DexFile::MaybeDex(const char* filename) {
   uint32_t magic;
   std::string error_msg;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(filename, &magic, &error_msg);
+  if (fd.Fd() == -1) {
     return false;
   }
   if (IsZipMagic(magic)) {
-    return ContainsClassesDex(fd.release(), filename);
+    return ContainsClassesDex(fd.Release(), filename);
   } else if (IsDexMagic(magic)) {
     return true;
   }
@@ -207,6 +212,7 @@
                                              uint32_t location_checksum,
                                              const OatDexFile* oat_dex_file,
                                              bool verify,
+                                             bool verify_checksum,
                                              std::string* error_msg) {
   ScopedTrace trace(std::string("Open dex file from RAM ") + location);
   std::unique_ptr<const DexFile> dex_file = OpenMemory(base,
@@ -220,6 +226,7 @@
                                          dex_file->Begin(),
                                          dex_file->Size(),
                                          location.c_str(),
+                                         verify_checksum,
                                          error_msg)) {
     return nullptr;
   }
@@ -227,13 +234,16 @@
   return dex_file;
 }
 
-std::unique_ptr<const DexFile> DexFile::OpenFile(int fd, const char* location, bool verify,
+std::unique_ptr<const DexFile> DexFile::OpenFile(int fd,
+                                                 const char* location,
+                                                 bool verify,
+                                                 bool verify_checksum,
                                                  std::string* error_msg) {
   ScopedTrace trace(std::string("Open dex file ") + location);
   CHECK(location != nullptr);
   std::unique_ptr<MemMap> map;
   {
-    ScopedFd delayed_close(fd);
+    File delayed_close(fd, /* check_usage */ false);
     struct stat sbuf;
     memset(&sbuf, 0, sizeof(sbuf));
     if (fstat(fd, &sbuf) == -1) {
@@ -276,7 +286,9 @@
   }
 
   if (verify && !DexFileVerifier::Verify(dex_file.get(), dex_file->Begin(), dex_file->Size(),
-                                         location, error_msg)) {
+                                         location,
+                                         verify_checksum,
+                                         error_msg)) {
     return nullptr;
   }
 
@@ -285,7 +297,10 @@
 
 const char* DexFile::kClassesDex = "classes.dex";
 
-bool DexFile::OpenZip(int fd, const std::string& location, std::string* error_msg,
+bool DexFile::OpenZip(int fd,
+                      const std::string& location,
+                      bool verify_checksum,
+                      std::string* error_msg,
                       std::vector<std::unique_ptr<const DexFile>>* dex_files) {
   ScopedTrace trace("Dex file open Zip " + std::string(location));
   DCHECK(dex_files != nullptr) << "DexFile::OpenZip: out-param is nullptr";
@@ -294,7 +309,7 @@
     DCHECK(!error_msg->empty());
     return false;
   }
-  return DexFile::OpenFromZip(*zip_archive, location, error_msg, dex_files);
+  return DexFile::OpenFromZip(*zip_archive, location, verify_checksum, error_msg, dex_files);
 }
 
 std::unique_ptr<const DexFile> DexFile::OpenMemory(const std::string& location,
@@ -310,8 +325,11 @@
                     error_msg);
 }
 
-std::unique_ptr<const DexFile> DexFile::Open(const ZipArchive& zip_archive, const char* entry_name,
-                                             const std::string& location, std::string* error_msg,
+std::unique_ptr<const DexFile> DexFile::Open(const ZipArchive& zip_archive,
+                                             const char* entry_name,
+                                             const std::string& location,
+                                             bool verify_checksum,
+                                             std::string* error_msg,
                                              ZipOpenErrorCode* error_code) {
   ScopedTrace trace("Dex file open from Zip Archive " + std::string(location));
   CHECK(!location.empty());
@@ -320,6 +338,11 @@
     *error_code = ZipOpenErrorCode::kEntryNotFound;
     return nullptr;
   }
+  if (zip_entry->GetUncompressedLength() == 0) {
+    *error_msg = StringPrintf("Dex file '%s' has zero length", location.c_str());
+    *error_code = ZipOpenErrorCode::kDexFileError;
+    return nullptr;
+  }
   std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(location.c_str(), entry_name, error_msg));
   if (map.get() == nullptr) {
     *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", entry_name, location.c_str(),
@@ -342,7 +365,9 @@
   }
   CHECK(dex_file->IsReadOnly()) << location;
   if (!DexFileVerifier::Verify(dex_file.get(), dex_file->Begin(), dex_file->Size(),
-                               location.c_str(), error_msg)) {
+                               location.c_str(),
+                               verify_checksum,
+                               error_msg)) {
     *error_code = ZipOpenErrorCode::kVerifyError;
     return nullptr;
   }
@@ -356,14 +381,16 @@
 // seems an excessive number.
 static constexpr size_t kWarnOnManyDexFilesThreshold = 100;
 
-bool DexFile::OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
+bool DexFile::OpenFromZip(const ZipArchive& zip_archive,
+                          const std::string& location,
+                          bool verify_checksum,
                           std::string* error_msg,
                           std::vector<std::unique_ptr<const DexFile>>* dex_files) {
   ScopedTrace trace("Dex file open from Zip " + std::string(location));
   DCHECK(dex_files != nullptr) << "DexFile::OpenFromZip: out-param is nullptr";
   ZipOpenErrorCode error_code;
-  std::unique_ptr<const DexFile> dex_file(Open(zip_archive, kClassesDex, location, error_msg,
-                                               &error_code));
+  std::unique_ptr<const DexFile> dex_file(
+      Open(zip_archive, kClassesDex, location, verify_checksum, error_msg, &error_code));
   if (dex_file.get() == nullptr) {
     return false;
   } else {
@@ -378,8 +405,8 @@
     for (size_t i = 1; ; ++i) {
       std::string name = GetMultiDexClassesDexName(i);
       std::string fake_location = GetMultiDexLocation(i, location.c_str());
-      std::unique_ptr<const DexFile> next_dex_file(Open(zip_archive, name.c_str(), fake_location,
-                                                        error_msg, &error_code));
+      std::unique_ptr<const DexFile> next_dex_file(
+          Open(zip_archive, name.c_str(), fake_location, verify_checksum, error_msg, &error_code));
       if (next_dex_file.get() == nullptr) {
         if (error_code != ZipOpenErrorCode::kEntryNotFound) {
           LOG(WARNING) << error_msg;
@@ -413,6 +440,8 @@
                                                    MemMap* mem_map,
                                                    const OatDexFile* oat_dex_file,
                                                    std::string* error_msg) {
+  DCHECK(base != nullptr);
+  DCHECK_NE(size, 0U);
   CHECK_ALIGNED(base, 4);  // various dex file structures must be word aligned
   std::unique_ptr<DexFile> dex_file(
       new DexFile(base, size, location, location_checksum, mem_map, oat_dex_file));
@@ -518,7 +547,7 @@
     return (class_def_idx != DexFile::kDexNoIndex) ? &GetClassDef(class_def_idx) : nullptr;
   }
 
-  // Fast path for rate no class defs case.
+  // Fast path for rare no class defs case.
   const uint32_t num_class_defs = NumClassDefs();
   if (num_class_defs == 0) {
     return nullptr;
@@ -548,8 +577,8 @@
 }
 
 const DexFile::FieldId* DexFile::FindFieldId(const DexFile::TypeId& declaring_klass,
-                                              const DexFile::StringId& name,
-                                              const DexFile::TypeId& type) const {
+                                             const DexFile::StringId& name,
+                                             const DexFile::TypeId& type) const {
   // Binary search MethodIds knowing that they are sorted by class_idx, name_idx then proto_idx
   const uint16_t class_idx = GetIndexForTypeId(declaring_klass);
   const uint32_t name_idx = GetIndexForStringId(name);
@@ -1309,7 +1338,7 @@
   AnnotationValue annotation_value;
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::Class> h_klass(hs.NewHandle(klass));
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   Handle<mirror::Class> return_type(hs.NewHandle(
       method->GetReturnType(true /* resolve */, pointer_size)));
   if (!ProcessAnnotationValue(h_klass, &annotation, &annotation_value, return_type, kAllObjects)) {
@@ -1374,7 +1403,9 @@
   return GetSignatureValue(method_class, annotation_set);
 }
 
-bool DexFile::IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class)
+bool DexFile::IsMethodAnnotationPresent(ArtMethod* method,
+                                        Handle<mirror::Class> annotation_class,
+                                        uint32_t visibility /* = kDexVisibilityRuntime */)
     const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
   if (annotation_set == nullptr) {
@@ -1382,8 +1413,10 @@
   }
   StackHandleScope<1> hs(Thread::Current());
   Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  const AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
-      method_class, annotation_set, kDexVisibilityRuntime, annotation_class);
+  const AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(method_class,
+                                                                             annotation_set,
+                                                                             visibility,
+                                                                             annotation_class);
   return annotation_item != nullptr;
 }
 
@@ -1601,12 +1634,12 @@
   Handle<mirror::String> string_name(
       hs.NewHandle(mirror::String::AllocFromModifiedUtf8(self, name)));
 
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   ArtMethod* annotation_method =
-      annotation_class->FindDeclaredVirtualMethodByName(name, sizeof(void*));
+      annotation_class->FindDeclaredVirtualMethodByName(name, pointer_size);
   if (annotation_method == nullptr) {
     return nullptr;
   }
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   Handle<mirror::Class> method_return(hs.NewHandle(
       annotation_method->GetReturnType(true /* resolve */, pointer_size)));
 
@@ -1619,8 +1652,16 @@
   mirror::Class* annotation_member_class =
       WellKnownClasses::ToClass(WellKnownClasses::libcore_reflect_AnnotationMember);
   Handle<mirror::Object> new_member(hs.NewHandle(annotation_member_class->AllocObject(self)));
-  Handle<mirror::Method> method_object(
-      hs.NewHandle(mirror::Method::CreateFromArtMethod(self, annotation_method)));
+  mirror::Method* method_obj_ptr;
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  if (pointer_size == PointerSize::k64) {
+    method_obj_ptr = mirror::Method::CreateFromArtMethod<PointerSize::k64, false>(
+        self, annotation_method);
+  } else {
+    method_obj_ptr = mirror::Method::CreateFromArtMethod<PointerSize::k32, false>(
+        self, annotation_method);
+  }
+  Handle<mirror::Method> method_object(hs.NewHandle(method_obj_ptr));
 
   if (new_member.Get() == nullptr || string_name.Get() == nullptr ||
       method_object.Get() == nullptr || method_return.Get() == nullptr) {
@@ -1928,16 +1969,31 @@
         StackHandleScope<2> hs(self);
         Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
         Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
-        ArtMethod* method = Runtime::Current()->GetClassLinker()->ResolveMethodWithoutInvokeType(
+        ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+        ArtMethod* method = class_linker->ResolveMethodWithoutInvokeType(
             klass->GetDexFile(), index, dex_cache, class_loader);
         if (method == nullptr) {
           return false;
         }
+        PointerSize pointer_size = class_linker->GetImagePointerSize();
         set_object = true;
+        DCHECK(!Runtime::Current()->IsActiveTransaction());
         if (method->IsConstructor()) {
-          element_object = mirror::Constructor::CreateFromArtMethod(self, method);
+          if (pointer_size == PointerSize::k64) {
+            element_object = mirror::Constructor::CreateFromArtMethod<PointerSize::k64,
+                                                                      false>(self, method);
+          } else {
+            element_object = mirror::Constructor::CreateFromArtMethod<PointerSize::k32,
+                                                                      false>(self, method);
+          }
         } else {
-          element_object = mirror::Method::CreateFromArtMethod(self, method);
+          if (pointer_size == PointerSize::k64) {
+            element_object = mirror::Method::CreateFromArtMethod<PointerSize::k64,
+                                                                 false>(self, method);
+          } else {
+            element_object = mirror::Method::CreateFromArtMethod<PointerSize::k32,
+                                                                 false>(self, method);
+          }
         }
         if (element_object == nullptr) {
           return false;
@@ -1959,7 +2015,12 @@
           return false;
         }
         set_object = true;
-        element_object = mirror::Field::CreateFromArtField(self, field, true);
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        if (pointer_size == PointerSize::k64) {
+          element_object = mirror::Field::CreateFromArtField<PointerSize::k64>(self, field, true);
+        } else {
+          element_object = mirror::Field::CreateFromArtField<PointerSize::k32>(self, field, true);
+        }
         if (element_object == nullptr) {
           return false;
         }
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 638821b..59339ef 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -63,7 +63,7 @@
   static const uint32_t kClassDefinitionOrderEnforcedVersion = 37;
 
   static const uint8_t kDexMagic[];
-  static constexpr size_t kNumDexVersions = 2;
+  static constexpr size_t kNumDexVersions = 3;
   static constexpr size_t kDexVersionLen = 4;
   static const uint8_t kDexMagicVersions[kNumDexVersions][kDexVersionLen];
 
@@ -416,7 +416,10 @@
   static bool GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg);
 
   // Opens .dex files found in the container, guessing the container format based on file extension.
-  static bool Open(const char* filename, const char* location, std::string* error_msg,
+  static bool Open(const char* filename,
+                   const char* location,
+                   bool verify_checksum,
+                   std::string* error_msg,
                    std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   // Checks whether the given file has the dex magic, or is a zip file with a classes.dex entry.
@@ -429,10 +432,13 @@
                                              uint32_t location_checksum,
                                              const OatDexFile* oat_dex_file,
                                              bool verify,
+                                             bool verify_checksum,
                                              std::string* error_msg);
 
   // Open all classesXXX.dex files from a zip archive.
-  static bool OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
+  static bool OpenFromZip(const ZipArchive& zip_archive,
+                          const std::string& location,
+                          bool verify_checksum,
                           std::string* error_msg,
                           std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
@@ -522,25 +528,12 @@
   // as the string length of the string data.
   const char* GetStringDataAndUtf16Length(const StringId& string_id, uint32_t* utf16_length) const;
 
-  const char* GetStringData(const StringId& string_id) const {
-    uint32_t ignored;
-    return GetStringDataAndUtf16Length(string_id, &ignored);
-  }
+  const char* GetStringData(const StringId& string_id) const;
 
   // Index version of GetStringDataAndUtf16Length.
-  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const {
-    if (idx == kDexNoIndex) {
-      *utf16_length = 0;
-      return nullptr;
-    }
-    const StringId& string_id = GetStringId(idx);
-    return GetStringDataAndUtf16Length(string_id, utf16_length);
-  }
+  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const;
 
-  const char* StringDataByIdx(uint32_t idx) const {
-    uint32_t unicode_length;
-    return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
-  }
+  const char* StringDataByIdx(uint32_t idx) const;
 
   // Looks up a string id for a given modified utf8 string.
   const StringId* FindStringId(const char* string) const;
@@ -571,20 +564,12 @@
   }
 
   // Get the descriptor string associated with a given type index.
-  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
-    const TypeId& type_id = GetTypeId(idx);
-    return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
-  }
+  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const;
 
-  const char* StringByTypeIdx(uint32_t idx) const {
-    const TypeId& type_id = GetTypeId(idx);
-    return StringDataByIdx(type_id.descriptor_idx_);
-  }
+  const char* StringByTypeIdx(uint32_t idx) const;
 
   // Returns the type descriptor string of a type id.
-  const char* GetTypeDescriptor(const TypeId& type_id) const {
-    return StringDataByIdx(type_id.descriptor_idx_);
-  }
+  const char* GetTypeDescriptor(const TypeId& type_id) const;
 
   // Looks up a type for the given string index
   const TypeId* FindTypeId(uint32_t string_idx) const;
@@ -619,15 +604,10 @@
   }
 
   // Returns the class descriptor string of a field id.
-  const char* GetFieldTypeDescriptor(const FieldId& field_id) const {
-    const DexFile::TypeId& type_id = GetTypeId(field_id.type_idx_);
-    return GetTypeDescriptor(type_id);
-  }
+  const char* GetFieldTypeDescriptor(const FieldId& field_id) const;
 
   // Returns the name of a field id.
-  const char* GetFieldName(const FieldId& field_id) const {
-    return StringDataByIdx(field_id.name_idx_);
-  }
+  const char* GetFieldName(const FieldId& field_id) const;
 
   // Returns the number of method identifiers in the .dex file.
   size_t NumMethodIds() const {
@@ -653,10 +633,7 @@
                                const DexFile::ProtoId& signature) const;
 
   // Returns the declaring class descriptor string of a method id.
-  const char* GetMethodDeclaringClassDescriptor(const MethodId& method_id) const {
-    const DexFile::TypeId& type_id = GetTypeId(method_id.class_idx_);
-    return GetTypeDescriptor(type_id);
-  }
+  const char* GetMethodDeclaringClassDescriptor(const MethodId& method_id) const;
 
   // Returns the prototype of a method id.
   const ProtoId& GetMethodPrototype(const MethodId& method_id) const {
@@ -667,23 +644,15 @@
   const Signature GetMethodSignature(const MethodId& method_id) const;
 
   // Returns the name of a method id.
-  const char* GetMethodName(const MethodId& method_id) const {
-    return StringDataByIdx(method_id.name_idx_);
-  }
+  const char* GetMethodName(const MethodId& method_id) const;
 
   // Returns the shorty of a method by its index.
-  const char* GetMethodShorty(uint32_t idx) const {
-    return StringDataByIdx(GetProtoId(GetMethodId(idx).proto_idx_).shorty_idx_);
-  }
+  const char* GetMethodShorty(uint32_t idx) const;
 
   // Returns the shorty of a method id.
-  const char* GetMethodShorty(const MethodId& method_id) const {
-    return StringDataByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_);
-  }
-  const char* GetMethodShorty(const MethodId& method_id, uint32_t* length) const {
-    // Using the UTF16 length is safe here as shorties are guaranteed to be ASCII characters.
-    return StringDataAndUtf16LengthByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_, length);
-  }
+  const char* GetMethodShorty(const MethodId& method_id) const;
+  const char* GetMethodShorty(const MethodId& method_id, uint32_t* length) const;
+
   // Returns the number of class definitions in the .dex file.
   uint32_t NumClassDefs() const {
     DCHECK(header_ != nullptr) << GetLocation();
@@ -703,9 +672,7 @@
   }
 
   // Returns the class descriptor string of a class definition.
-  const char* GetClassDescriptor(const ClassDef& class_def) const {
-    return StringByTypeIdx(class_def.class_idx_);
-  }
+  const char* GetClassDescriptor(const ClassDef& class_def) const;
 
   // Looks up a class definition by its class descriptor. Hash must be
   // ComputeModifiedUtf8Hash(descriptor).
@@ -743,9 +710,7 @@
     }
   }
 
-  const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const {
-    return StringByTypeIdx(proto_id.return_type_idx_);
-  }
+  const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const;
 
   // Returns the number of prototype identifiers in the .dex file.
   size_t NumProtoIds() const {
@@ -782,10 +747,7 @@
   const Signature CreateSignature(const StringPiece& signature) const;
 
   // Returns the short form method descriptor for the given prototype.
-  const char* GetShorty(uint32_t proto_idx) const {
-    const ProtoId& proto_id = GetProtoId(proto_idx);
-    return StringDataByIdx(proto_id.shorty_idx_);
-  }
+  const char* GetShorty(uint32_t proto_idx) const;
 
   const TypeList* GetProtoParameters(const ProtoId& proto_id) const {
     if (proto_id.parameters_off_ == 0) {
@@ -998,7 +960,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForMethod(ArtMethod* method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class) const
+  bool IsMethodAnnotationPresent(ArtMethod* method,
+                                 Handle<mirror::Class> annotation_class,
+                                 uint32_t visibility = kDexVisibilityRuntime) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   const AnnotationSetItem* FindAnnotationSetForClass(Handle<mirror::Class> klass) const
@@ -1177,11 +1141,17 @@
 
  private:
   // Opens a .dex file
-  static std::unique_ptr<const DexFile> OpenFile(int fd, const char* location,
-                                                 bool verify, std::string* error_msg);
+  static std::unique_ptr<const DexFile> OpenFile(int fd,
+                                                 const char* location,
+                                                 bool verify,
+                                                 bool verify_checksum,
+                                                 std::string* error_msg);
 
   // Opens dex files from within a .jar, .zip, or .apk file
-  static bool OpenZip(int fd, const std::string& location, std::string* error_msg,
+  static bool OpenZip(int fd,
+                      const std::string& location,
+                      bool verify_checksum,
+                      std::string* error_msg,
                       std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   enum class ZipOpenErrorCode {  // private
@@ -1195,8 +1165,11 @@
 
   // Opens .dex file from the entry_name in a zip archive. error_code is undefined when non-null
   // return.
-  static std::unique_ptr<const DexFile> Open(const ZipArchive& zip_archive, const char* entry_name,
-                                             const std::string& location, std::string* error_msg,
+  static std::unique_ptr<const DexFile> Open(const ZipArchive& zip_archive,
+                                             const char* entry_name,
+                                             const std::string& location,
+                                             bool verify_checksum,
+                                             std::string* error_msg,
                                              ZipOpenErrorCode* error_code);
 
   // Opens a .dex file at the given address backed by a MemMap
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 796701d..2704d8a 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -133,8 +133,46 @@
   "AAACAAAAQAEAAAEgAAACAAAAVAEAAAYgAAACAAAAiAEAAAEQAAABAAAAqAEAAAIgAAAPAAAArgEA"
   "AAMgAAACAAAAiAIAAAQgAAADAAAAlAIAAAAgAAACAAAAqwIAAAAQAAABAAAAxAIAAA==";
 
-static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
-                                                        const char* location) {
+// kRawDex38 and 39 are dex'ed versions of the following Java source :
+//
+// public class Main {
+//     public static void main(String[] foo) {
+//     }
+// }
+//
+// The dex file was manually edited to change its dex version code to 38
+// or 39, respectively.
+static const char kRawDex38[] =
+  "ZGV4CjAzOAC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+  "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+  "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+  "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+  "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+  "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+  "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+  "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+  "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+  "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDex39[] =
+  "ZGV4CjAzOQC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+  "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+  "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+  "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+  "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+  "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+  "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+  "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+  "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+  "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDexZeroLength[] =
+  "UEsDBAoAAAAAAOhxAkkAAAAAAAAAAAAAAAALABwAY2xhc3Nlcy5kZXhVVAkAA2QNoVdnDaFXdXgL"
+  "AAEE5AMBAASIEwAAUEsBAh4DCgAAAAAA6HECSQAAAAAAAAAAAAAAAAsAGAAAAAAAAAAAAKCBAAAA"
+  "AGNsYXNzZXMuZGV4VVQFAANkDaFXdXgLAAEE5AMBAASIEwAAUEsFBgAAAAABAAEAUQAAAEUAAAAA"
+  "AA==";
+
+static void DecodeAndWriteDexFile(const char* base64, const char* location) {
   // decode base64
   CHECK(base64 != nullptr);
   size_t length;
@@ -150,13 +188,18 @@
   if (file->FlushCloseOrErase() != 0) {
     PLOG(FATAL) << "Could not flush and close test file.";
   }
-  file.reset();
+}
+
+static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
+                                                        const char* location) {
+  DecodeAndWriteDexFile(base64, location);
 
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> tmp;
-  bool success = DexFile::Open(location, location, &error_msg, &tmp);
+  bool success = DexFile::Open(location, location, kVerifyChecksum, &error_msg, &tmp);
   CHECK(success) << error_msg;
   EXPECT_EQ(1U, tmp.size());
   std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
@@ -196,6 +239,39 @@
   EXPECT_EQ(header.checksum_, raw->GetLocationChecksum());
 }
 
+TEST_F(DexFileTest, Version38Accepted) {
+  ScratchFile tmp;
+  std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kRawDex38, tmp.GetFilename().c_str()));
+  ASSERT_TRUE(raw.get() != nullptr);
+
+  const DexFile::Header& header = raw->GetHeader();
+  EXPECT_EQ(38u, header.GetVersion());
+}
+
+TEST_F(DexFileTest, Version39Rejected) {
+  ScratchFile tmp;
+  const char* location = tmp.GetFilename().c_str();
+  DecodeAndWriteDexFile(kRawDex39, location);
+
+  ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
+  std::string error_msg;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
+TEST_F(DexFileTest, ZeroLengthDexRejected) {
+  ScratchFile tmp;
+  const char* location = tmp.GetFilename().c_str();
+  DecodeAndWriteDexFile(kRawDexZeroLength, location);
+
+  ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
+  std::string error_msg;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
 TEST_F(DexFileTest, GetLocationChecksum) {
   ScopedObjectAccess soa(Thread::Current());
   std::unique_ptr<const DexFile> raw(OpenTestDexFile("Main"));
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index d2e84ad..5132efc 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -101,36 +101,41 @@
 }
 
 // Helper macro to load string and return false on error.
-#define LOAD_STRING(var, idx, error)                  \
-  const char* var = CheckLoadStringByIdx(idx, error); \
-  if (UNLIKELY(var == nullptr)) {                     \
-    return false;                                     \
+#define LOAD_STRING(var, idx, error)                    \
+  const char* (var) = CheckLoadStringByIdx(idx, error); \
+  if (UNLIKELY((var) == nullptr)) {                     \
+    return false;                                       \
   }
 
 // Helper macro to load string by type idx and return false on error.
-#define LOAD_STRING_BY_TYPE(var, type_idx, error)              \
-  const char* var = CheckLoadStringByTypeIdx(type_idx, error); \
-  if (UNLIKELY(var == nullptr)) {                              \
-    return false;                                              \
+#define LOAD_STRING_BY_TYPE(var, type_idx, error)                \
+  const char* (var) = CheckLoadStringByTypeIdx(type_idx, error); \
+  if (UNLIKELY((var) == nullptr)) {                              \
+    return false;                                                \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_METHOD(var, idx, error_string, error_stmt)                 \
-  const DexFile::MethodId* var  = CheckLoadMethodId(idx, error_string); \
-  if (UNLIKELY(var == nullptr)) {                                       \
-    error_stmt;                                                         \
+#define LOAD_METHOD(var, idx, error_string, error_stmt)                   \
+  const DexFile::MethodId* (var)  = CheckLoadMethodId(idx, error_string); \
+  if (UNLIKELY((var) == nullptr)) {                                       \
+    error_stmt;                                                           \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_FIELD(var, idx, fmt, error_stmt)               \
-  const DexFile::FieldId* var = CheckLoadFieldId(idx, fmt); \
-  if (UNLIKELY(var == nullptr)) {                           \
-    error_stmt;                                             \
+#define LOAD_FIELD(var, idx, fmt, error_stmt)                 \
+  const DexFile::FieldId* (var) = CheckLoadFieldId(idx, fmt); \
+  if (UNLIKELY((var) == nullptr)) {                           \
+    error_stmt;                                               \
   }
 
-bool DexFileVerifier::Verify(const DexFile* dex_file, const uint8_t* begin, size_t size,
-                             const char* location, std::string* error_msg) {
-  std::unique_ptr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location));
+bool DexFileVerifier::Verify(const DexFile* dex_file,
+                             const uint8_t* begin,
+                             size_t size,
+                             const char* location,
+                             bool verify_checksum,
+                             std::string* error_msg) {
+  std::unique_ptr<DexFileVerifier> verifier(
+      new DexFileVerifier(dex_file, begin, size, location, verify_checksum));
   if (!verifier->Verify()) {
     *error_msg = verifier->FailureReason();
     return false;
@@ -273,8 +278,13 @@
   const uint8_t* non_sum_ptr = reinterpret_cast<const uint8_t*>(header_) + non_sum;
   adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum);
   if (adler_checksum != header_->checksum_) {
-    ErrorStringPrintf("Bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
-    return false;
+    if (verify_checksum_) {
+      ErrorStringPrintf("Bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
+      return false;
+    } else {
+      LOG(WARNING) << StringPrintf(
+          "Ignoring bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
+    }
   }
 
   // Check the contents of the header.
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 90409db..133e432 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -26,17 +26,31 @@
 
 class DexFileVerifier {
  public:
-  static bool Verify(const DexFile* dex_file, const uint8_t* begin, size_t size,
-                     const char* location, std::string* error_msg);
+  static bool Verify(const DexFile* dex_file,
+                     const uint8_t* begin,
+                     size_t size,
+                     const char* location,
+                     bool verify_checksum,
+                     std::string* error_msg);
 
   const std::string& FailureReason() const {
     return failure_reason_;
   }
 
  private:
-  DexFileVerifier(const DexFile* dex_file, const uint8_t* begin, size_t size, const char* location)
-      : dex_file_(dex_file), begin_(begin), size_(size), location_(location),
-        header_(&dex_file->GetHeader()), ptr_(nullptr), previous_item_(nullptr)  {
+  DexFileVerifier(const DexFile* dex_file,
+                  const uint8_t* begin,
+                  size_t size,
+                  const char* location,
+                  bool verify_checksum)
+      : dex_file_(dex_file),
+        begin_(begin),
+        size_(size),
+        location_(location),
+        verify_checksum_(verify_checksum),
+        header_(&dex_file->GetHeader()),
+        ptr_(nullptr),
+        previous_item_(nullptr)  {
   }
 
   bool Verify();
@@ -176,6 +190,7 @@
   const uint8_t* const begin_;
   const size_t size_;
   const char* const location_;
+  const bool verify_checksum_;
   const DexFile::Header* const header_;
 
   struct OffsetTypeMapEmptyFn {
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 4e53914..71c0ad9 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -122,6 +122,10 @@
 
 class DexFileVerifierTest : public CommonRuntimeTest {
  protected:
+  DexFile* GetDexFile(const uint8_t* dex_bytes, size_t length) {
+    return new DexFile(dex_bytes, length, "tmp", 0, nullptr, nullptr);
+  }
+
   void VerifyModification(const char* dex_file_base64_content,
                           const char* location,
                           std::function<void(DexFile*)> f,
@@ -130,16 +134,17 @@
     std::unique_ptr<uint8_t[]> dex_bytes = DecodeBase64(dex_file_base64_content, &length);
     CHECK(dex_bytes != nullptr);
     // Note: `dex_file` will be destroyed before `dex_bytes`.
-    std::unique_ptr<DexFile> dex_file(
-        new DexFile(dex_bytes.get(), length, "tmp", 0, nullptr, nullptr));
+    std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
     f(dex_file.get());
     FixUpChecksum(const_cast<uint8_t*>(dex_file->Begin()));
 
+    static constexpr bool kVerifyChecksum = true;
     std::string error_msg;
     bool success = DexFileVerifier::Verify(dex_file.get(),
                                            dex_file->Begin(),
                                            dex_file->Size(),
                                            location,
+                                           kVerifyChecksum,
                                            &error_msg);
     if (expected_error == nullptr) {
       EXPECT_TRUE(success) << error_msg;
@@ -175,7 +180,7 @@
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
   std::vector<std::unique_ptr<const DexFile>> tmp;
-  bool success = DexFile::Open(location, location, error_msg, &tmp);
+  bool success = DexFile::Open(location, location, true, error_msg, &tmp);
   CHECK(success) << error_msg;
   EXPECT_EQ(1U, tmp.size());
   std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
@@ -1697,4 +1702,45 @@
       " implemented interface with type idx: '0'");
 }
 
+TEST_F(DexFileVerifierTest, Checksum) {
+  size_t length;
+  std::unique_ptr<uint8_t[]> dex_bytes = DecodeBase64(kGoodTestDex, &length);
+  CHECK(dex_bytes != nullptr);
+  // Note: `dex_file` will be destroyed before `dex_bytes`.
+  std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
+  std::string error_msg;
+
+  // Good checksum: all pass.
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                       "good checksum, no verify",
+                                      /*verify_checksum*/ false,
+                                      &error_msg));
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                      "good checksum, verify",
+                                      /*verify_checksum*/ true,
+                                      &error_msg));
+
+  // Bad checksum: !verify_checksum passes verify_checksum fails.
+  DexFile::Header* header = reinterpret_cast<DexFile::Header*>(
+      const_cast<uint8_t*>(dex_file->Begin()));
+  header->checksum_ = 0;
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                      "bad checksum, no verify",
+                                      /*verify_checksum*/ false,
+                                      &error_msg));
+  EXPECT_FALSE(DexFileVerifier::Verify(dex_file.get(),
+                                       dex_file->Begin(),
+                                       dex_file->Size(),
+                                       "bad checksum, verify",
+                                       /*verify_checksum*/ true,
+                                       &error_msg));
+  EXPECT_NE(error_msg.find("Bad checksum"), std::string::npos) << error_msg;
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index e160a10..f6ed1f0 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -49,6 +49,8 @@
     case k32x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     case k51l: return true;
     default: return false;
   }
@@ -79,6 +81,8 @@
     case k32x: return VRegA_32x();
     case k35c: return VRegA_35c();
     case k3rc: return VRegA_3rc();
+    case k45cc: return VRegA_45cc();
+    case k4rcc: return VRegA_4rcc();
     case k51l: return VRegA_51l();
     default:
       LOG(FATAL) << "Tried to access vA of instruction " << Name() << " which has no A operand.";
@@ -206,6 +210,16 @@
   return InstAA(inst_data);
 }
 
+inline uint4_t Instruction::VRegA_45cc(uint16_t inst_data) const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return InstB(inst_data);  // This is labeled A in the spec.
+}
+
+inline uint8_t Instruction::VRegA_4rcc(uint16_t inst_data) const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return InstAA(inst_data);
+}
+
 //------------------------------------------------------------------------------
 // VRegB
 //------------------------------------------------------------------------------
@@ -223,13 +237,14 @@
     case k22t: return true;
     case k22x: return true;
     case k23x: return true;
-    case k25x: return true;
     case k31c: return true;
     case k31i: return true;
     case k31t: return true;
     case k32x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     case k51l: return true;
     default: return false;
   }
@@ -253,13 +268,14 @@
     case k22t: return VRegB_22t();
     case k22x: return VRegB_22x();
     case k23x: return VRegB_23x();
-    case k25x: return VRegB_25x();
     case k31c: return VRegB_31c();
     case k31i: return VRegB_31i();
     case k31t: return VRegB_31t();
     case k32x: return VRegB_32x();
     case k35c: return VRegB_35c();
     case k3rc: return VRegB_3rc();
+    case k45cc: return VRegB_45cc();
+    case k4rcc: return VRegB_4rcc();
     case k51l: return VRegB_51l();
     default:
       LOG(FATAL) << "Tried to access vB of instruction " << Name() << " which has no B operand.";
@@ -331,12 +347,6 @@
   return static_cast<uint8_t>(Fetch16(1) & 0xff);
 }
 
-// Number of additional registers in this instruction. # of var arg registers = this value + 1.
-inline uint4_t Instruction::VRegB_25x() const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-  return InstB(Fetch16(0));
-}
-
 inline uint32_t Instruction::VRegB_31c() const {
   DCHECK_EQ(FormatOf(Opcode()), k31c);
   return Fetch32(1);
@@ -367,6 +377,16 @@
   return Fetch16(1);
 }
 
+inline uint16_t Instruction::VRegB_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return Fetch16(1);
+}
+
+inline uint16_t Instruction::VRegB_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(1);
+}
+
 inline uint64_t Instruction::VRegB_51l() const {
   DCHECK_EQ(FormatOf(Opcode()), k51l);
   uint64_t vB_wide = Fetch32(1) | ((uint64_t) Fetch32(3) << 32);
@@ -383,9 +403,10 @@
     case k22s: return true;
     case k22t: return true;
     case k23x: return true;
-    case k25x: return true;
     case k35c: return true;
     case k3rc: return true;
+    case k45cc: return true;
+    case k4rcc: return true;
     default: return false;
   }
 }
@@ -397,9 +418,10 @@
     case k22s: return VRegC_22s();
     case k22t: return VRegC_22t();
     case k23x: return VRegC_23x();
-    case k25x: return VRegC_25x();
     case k35c: return VRegC_35c();
     case k3rc: return VRegC_3rc();
+    case k45cc: return VRegC_45cc();
+    case k4rcc: return VRegC_4rcc();
     default:
       LOG(FATAL) << "Tried to access vC of instruction " << Name() << " which has no C operand.";
       exit(EXIT_FAILURE);
@@ -431,11 +453,6 @@
   return static_cast<uint8_t>(Fetch16(1) >> 8);
 }
 
-inline uint4_t Instruction::VRegC_25x() const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-  return static_cast<uint4_t>(Fetch16(1) & 0xf);
-}
-
 inline uint4_t Instruction::VRegC_35c() const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
   return static_cast<uint4_t>(Fetch16(2) & 0x0f);
@@ -446,81 +463,53 @@
   return Fetch16(2);
 }
 
-inline bool Instruction::HasVarArgs35c() const {
-  return FormatOf(Opcode()) == k35c;
+inline uint4_t Instruction::VRegC_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return static_cast<uint4_t>(Fetch16(2) & 0x0f);
 }
 
-inline bool Instruction::HasVarArgs25x() const {
-  return FormatOf(Opcode()) == k25x;
+inline uint16_t Instruction::VRegC_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(2);
 }
 
-// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+2.
-inline void Instruction::GetAllArgs25x(uint32_t (&arg)[kMaxVarArgRegs25x]) const {
-  DCHECK_EQ(FormatOf(Opcode()), k25x);
-
-  /*
-   * The opcode looks like this:
-   *   op vC, {vD, vE, vF, vG}
-   *
-   *  and vB is the (implicit) register count (0-4) which denotes how far from vD to vG to read.
-   *
-   *  vC is always present, so with "op vC, {}" the register count will be 0 even though vC
-   *  is valid.
-   *
-   *  The exact semantic meanings of vC:vG is up to the instruction using the format.
-   *
-   *  Encoding drawing as a bit stream:
-   *  (Note that each uint16 is little endian, and each register takes up 4 bits)
-   *
-   *       uint16  |||   uint16
-   *   7-0     15-8    7-0   15-8
-   *  |------|-----|||-----|-----|
-   *  |opcode|vB|vG|||vD|vC|vF|vE|
-   *  |------|-----|||-----|-----|
-   */
-  uint16_t reg_list = Fetch16(1);
-  uint4_t count = VRegB_25x();
-  DCHECK_LE(count, 4U) << "Invalid arg count in 25x (" << count << ")";
-
-  /*
-   * TODO(iam): Change instruction encoding to one of:
-   *
-   * - (X) vA = args count, vB = closure register, {vC..vG} = args (25x)
-   * - (Y) vA = args count, vB = method index, {vC..vG} = args (35x)
-   *
-   * (do this in conjunction with adding verifier support for invoke-lambda)
-   */
-
-  /*
-   * Copy the argument registers into the arg[] array, and
-   * also copy the first argument into vC. (The
-   * DecodedInstruction structure doesn't have separate
-   * fields for {vD, vE, vF, vG}, so there's no need to make
-   * copies of those.) Note that all cases fall-through.
-   */
-  switch (count) {
-    case 4:
-      arg[5] = (Fetch16(0) >> 8) & 0x0f;  // vG
-      FALLTHROUGH_INTENDED;
-    case 3:
-      arg[4] = (reg_list >> 12) & 0x0f;  // vF
-      FALLTHROUGH_INTENDED;
-    case 2:
-      arg[3] = (reg_list >> 8) & 0x0f;  // vE
-      FALLTHROUGH_INTENDED;
-    case 1:
-      arg[2] = (reg_list >> 4) & 0x0f;  // vD
-      FALLTHROUGH_INTENDED;
-    default:  // case 0
-      // The required lambda 'this' is actually a pair, but the pair is implicit.
-      arg[0] = VRegC_25x();  // vC
-      arg[1] = arg[0] + 1;   // vC + 1
-      break;
+//------------------------------------------------------------------------------
+// VRegH
+//------------------------------------------------------------------------------
+inline bool Instruction::HasVRegH() const {
+  switch (FormatOf(Opcode())) {
+    case k45cc: return true;
+    case k4rcc: return true;
+    default : return false;
   }
 }
 
+inline int32_t Instruction::VRegH() const {
+  switch (FormatOf(Opcode())) {
+    case k45cc: return VRegH_45cc();
+    case k4rcc: return VRegH_4rcc();
+    default :
+      LOG(FATAL) << "Tried to access vH of instruction " << Name() << " which has no H operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
+inline uint16_t Instruction::VRegH_45cc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k45cc);
+  return Fetch16(3);
+}
+
+inline uint16_t Instruction::VRegH_4rcc() const {
+  DCHECK_EQ(FormatOf(Opcode()), k4rcc);
+  return Fetch16(3);
+}
+
+inline bool Instruction::HasVarArgs() const {
+  return (FormatOf(Opcode()) == k35c) || (FormatOf(Opcode()) == k45cc);
+}
+
 inline void Instruction::GetVarArgs(uint32_t arg[kMaxVarArgRegs], uint16_t inst_data) const {
-  DCHECK_EQ(FormatOf(Opcode()), k35c);
+  DCHECK(HasVarArgs());
 
   /*
    * Note that the fields mentioned in the spec don't appear in
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 3f62124..c31d236 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -28,7 +28,7 @@
 namespace art {
 
 const char* const Instruction::kInstructionNames[] = {
-#define INSTRUCTION_NAME(o, c, pname, f, r, i, a, v) pname,
+#define INSTRUCTION_NAME(o, c, pname, f, i, a, v) pname,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
 #undef DEX_INSTRUCTION_LIST
@@ -36,7 +36,7 @@
 };
 
 Instruction::Format const Instruction::kInstructionFormats[] = {
-#define INSTRUCTION_FORMAT(o, c, p, format, r, i, a, v) format,
+#define INSTRUCTION_FORMAT(o, c, p, format, i, a, v) format,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_FORMAT)
 #undef DEX_INSTRUCTION_LIST
@@ -44,7 +44,7 @@
 };
 
 Instruction::IndexType const Instruction::kInstructionIndexTypes[] = {
-#define INSTRUCTION_INDEX_TYPE(o, c, p, f, r, index, a, v) index,
+#define INSTRUCTION_INDEX_TYPE(o, c, p, f, index, a, v) index,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_INDEX_TYPE)
 #undef DEX_INSTRUCTION_LIST
@@ -52,7 +52,7 @@
 };
 
 int const Instruction::kInstructionFlags[] = {
-#define INSTRUCTION_FLAGS(o, c, p, f, r, i, flags, v) flags,
+#define INSTRUCTION_FLAGS(o, c, p, f, i, flags, v) flags,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_FLAGS)
 #undef DEX_INSTRUCTION_LIST
@@ -60,7 +60,7 @@
 };
 
 int const Instruction::kInstructionVerifyFlags[] = {
-#define INSTRUCTION_VERIFY_FLAGS(o, c, p, f, r, i, a, vflags) vflags,
+#define INSTRUCTION_VERIFY_FLAGS(o, c, p, f, i, a, vflags) vflags,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_VERIFY_FLAGS)
 #undef DEX_INSTRUCTION_LIST
@@ -68,12 +68,13 @@
 };
 
 int const Instruction::kInstructionSizeInCodeUnits[] = {
-#define INSTRUCTION_SIZE(opcode, c, p, format, r, i, a, v) \
-    ((opcode == NOP)                        ? -1 : \
-     ((format >= k10x) && (format <= k10t)) ?  1 : \
-     ((format >= k20t) && (format <= k25x)) ?  2 : \
-     ((format >= k32x) && (format <= k3rc)) ?  3 : \
-      (format == k51l)                      ?  5 : -1),
+#define INSTRUCTION_SIZE(opcode, c, p, format, i, a, v) \
+    (((opcode) == NOP) ? -1 : \
+     (((format) >= k10x) && ((format) <= k10t)) ?  1 : \
+     (((format) >= k20t) && ((format) <= k22c)) ?  2 : \
+     (((format) >= k32x) && ((format) <= k3rc)) ?  3 : \
+     (((format) >= k45cc) && ((format) <= k4rcc)) ? 4 : \
+      ((format) == k51l) ?  5 : -1),
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_SIZE)
 #undef DEX_INSTRUCTION_LIST
@@ -241,14 +242,6 @@
             break;
           }
           FALLTHROUGH_INTENDED;
-        case CREATE_LAMBDA:
-          if (file != nullptr) {
-            uint32_t method_idx = VRegB_21c();
-            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << PrettyMethod(method_idx, *file, true)
-               << " // method@" << method_idx;
-            break;
-          }
-          FALLTHROUGH_INTENDED;
         default:
           os << StringPrintf("%s v%d, thing@%d", opcode, VRegA_21c(), VRegB_21c());
           break;
@@ -329,26 +322,6 @@
       }
       break;
     }
-    case k25x: {
-      if (Opcode() == INVOKE_LAMBDA) {
-        uint32_t arg[kMaxVarArgRegs25x];
-        GetAllArgs25x(arg);
-        const size_t num_extra_var_args = VRegB_25x();
-        DCHECK_LE(num_extra_var_args + 2, arraysize(arg));
-
-        // invoke-lambda vC, {vD, vE, vF, vG}
-        os << opcode << " v" << arg[0] << ", {";
-        for (size_t i = 0; i < num_extra_var_args; ++i) {
-          if (i != 0) {
-            os << ", ";
-          }
-          os << "v" << arg[i + 2];  // Don't print the pair of vC registers. Pair is implicit.
-        }
-        os << "}";
-        break;
-      }
-      FALLTHROUGH_INTENDED;
-    }
     case k32x:  os << StringPrintf("%s v%d, v%d", opcode, VRegA_32x(), VRegB_32x()); break;
     case k30t:  os << StringPrintf("%s %+d", opcode, VRegA_30t()); break;
     case k31t:  os << StringPrintf("%s v%d, %+d", opcode, VRegA_31t(), VRegB_31t()); break;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 035230e..f437fde 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -80,7 +80,7 @@
   };
 
   enum Code {  // private marker to avoid generate-operator-out.py from processing.
-#define INSTRUCTION_ENUM(opcode, cname, p, f, r, i, a, v) cname = opcode,
+#define INSTRUCTION_ENUM(opcode, cname, p, f, i, a, v) cname = (opcode),
 #include "dex_instruction_list.h"
     DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM)
 #undef DEX_INSTRUCTION_LIST
@@ -105,7 +105,6 @@
     k22t,  // op vA, vB, +CCCC
     k22s,  // op vA, vB, #+CCCC
     k22c,  // op vA, vB, thing@CCCC
-    k25x,  // op vC, {vD, vE, vF, vG} (B: count)
     k32x,  // op vAAAA, vBBBB
     k30t,  // op +AAAAAAAA
     k31t,  // op vAA, +BBBBBBBB
@@ -113,6 +112,15 @@
     k31c,  // op vAA, thing@BBBBBBBB
     k35c,  // op {vC, vD, vE, vF, vG}, thing@BBBB (B: count, A: vG)
     k3rc,  // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
+
+    // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH (A: count)
+    // format: AG op BBBB FEDC HHHH
+    k45cc,
+
+    // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count)
+    // format: AA op BBBB CCCC HHHH
+    k4rcc,  // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count)
+
     k51l,  // op vAA, #+BBBBBBBBBBBBBBBB
   };
 
@@ -180,12 +188,9 @@
     kVerifyVarArgRangeNonZero = 0x100000,
     kVerifyRuntimeOnly        = 0x200000,
     kVerifyError              = 0x400000,
-    kVerifyRegCString         = 0x800000,
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
-  static constexpr uint32_t kMaxVarArgRegs25x = 6;  // lambdas are 2 registers.
-  static constexpr uint32_t kLambdaVirtualRegisterWidth = 2;
 
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
@@ -221,7 +226,7 @@
 
   // Returns a pointer to the instruction after this 2xx instruction in the stream.
   const Instruction* Next_2xx() const {
-    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k25x);
+    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k22c);
     return RelativeAt(2);
   }
 
@@ -231,6 +236,12 @@
     return RelativeAt(3);
   }
 
+  // Returns a pointer to the instruction after this 4xx instruction in the stream.
+  const Instruction* Next_4xx() const {
+    DCHECK(FormatOf(Opcode()) >= k45cc && FormatOf(Opcode()) <= k4rcc);
+    return RelativeAt(4);
+  }
+
   // Returns a pointer to the instruction after this 51l instruction in the stream.
   const Instruction* Next_51l() const {
     DCHECK(FormatOf(Opcode()) == k51l);
@@ -317,6 +328,12 @@
   uint8_t VRegA_51l() const {
     return VRegA_51l(Fetch16(0));
   }
+  uint4_t VRegA_45cc() const {
+    return VRegA_45cc(Fetch16(0));
+  }
+  uint8_t VRegA_4rcc() const {
+    return VRegA_4rcc(Fetch16(0));
+  }
 
   // The following methods return the vA operand for various instruction formats. The "inst_data"
   // parameter holds the first 16 bits of instruction which the returned value is decoded from.
@@ -341,6 +358,8 @@
   uint4_t VRegA_35c(uint16_t inst_data) const;
   uint8_t VRegA_3rc(uint16_t inst_data) const;
   uint8_t VRegA_51l(uint16_t inst_data) const;
+  uint4_t VRegA_45cc(uint16_t inst_data) const;
+  uint8_t VRegA_4rcc(uint16_t inst_data) const;
 
   // VRegB
   bool HasVRegB() const;
@@ -371,7 +390,6 @@
   }
   uint16_t VRegB_22x() const;
   uint8_t VRegB_23x() const;
-  uint4_t VRegB_25x() const;
   uint32_t VRegB_31c() const;
   int32_t VRegB_31i() const;
   int32_t VRegB_31t() const;
@@ -379,6 +397,8 @@
   uint16_t VRegB_35c() const;
   uint16_t VRegB_3rc() const;
   uint64_t VRegB_51l() const;  // vB_wide
+  uint16_t VRegB_45cc() const;
+  uint16_t VRegB_4rcc() const;
 
   // The following methods return the vB operand for all instruction formats where it is encoded in
   // the first 16 bits of instruction. The "inst_data" parameter holds these 16 bits. The returned
@@ -398,20 +418,24 @@
   int16_t VRegC_22s() const;
   int16_t VRegC_22t() const;
   uint8_t VRegC_23x() const;
-  uint4_t VRegC_25x() const;
   uint4_t VRegC_35c() const;
   uint16_t VRegC_3rc() const;
+  uint4_t VRegC_45cc() const;
+  uint16_t VRegC_4rcc() const;
+
+
+  // VRegH
+  bool HasVRegH() const;
+  int32_t VRegH() const;
+  uint16_t VRegH_45cc() const;
+  uint16_t VRegH_4rcc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  bool HasVarArgs35c() const;
-  bool HasVarArgs25x() const;
-
-  // TODO(iam): Make this name more consistent with GetAllArgs25x by including the opcode format.
+  bool HasVarArgs() const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs], uint16_t inst_data) const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
     return GetVarArgs(args, Fetch16(0));
   }
-  void GetAllArgs25x(uint32_t (&args)[kMaxVarArgRegs25x]) const;
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
   // 16 bits of instruction.
@@ -539,7 +563,7 @@
 
   int GetVerifyTypeArgumentC() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField |
-        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide | kVerifyRegCString));
+        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
   }
 
   int GetVerifyExtraFlags() const {
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index 9d7e0c4..e974932 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -17,264 +17,265 @@
 #ifndef ART_RUNTIME_DEX_INSTRUCTION_LIST_H_
 #define ART_RUNTIME_DEX_INSTRUCTION_LIST_H_
 
+// V(opcode, instruction_code, name, format, index, flags, verifier_flags);
 #define DEX_INSTRUCTION_LIST(V) \
-  V(0x00, NOP, "nop", k10x, false, kIndexNone, kContinue, kVerifyNone) \
-  V(0x01, MOVE, "move", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x02, MOVE_FROM16, "move/from16", k22x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x03, MOVE_16, "move/16", k32x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x04, MOVE_WIDE, "move-wide", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x05, MOVE_WIDE_FROM16, "move-wide/from16", k22x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x06, MOVE_WIDE_16, "move-wide/16", k32x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x07, MOVE_OBJECT, "move-object", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x08, MOVE_OBJECT_FROM16, "move-object/from16", k22x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x09, MOVE_OBJECT_16, "move-object/16", k32x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x0A, MOVE_RESULT, "move-result", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0B, MOVE_RESULT_WIDE, "move-result-wide", k11x, true, kIndexNone, kContinue, kVerifyRegAWide) \
-  V(0x0C, MOVE_RESULT_OBJECT, "move-result-object", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0D, MOVE_EXCEPTION, "move-exception", k11x, true, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0E, RETURN_VOID, "return-void", k10x, false, kIndexNone, kReturn, kVerifyNone) \
-  V(0x0F, RETURN, "return", k11x, false, kIndexNone, kReturn, kVerifyRegA) \
-  V(0x10, RETURN_WIDE, "return-wide", k11x, false, kIndexNone, kReturn, kVerifyRegAWide) \
-  V(0x11, RETURN_OBJECT, "return-object", k11x, false, kIndexNone, kReturn, kVerifyRegA) \
-  V(0x12, CONST_4, "const/4", k11n, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x13, CONST_16, "const/16", k21s, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x14, CONST, "const", k31i, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x15, CONST_HIGH16, "const/high16", k21h, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x18, CONST_WIDE, "const-wide", k51l, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, true, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x1A, CONST_STRING, "const-string", k21c, true, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
-  V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, true, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
-  V(0x1C, CONST_CLASS, "const-class", k21c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
-  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
-  V(0x1F, CHECK_CAST, "check-cast", k21c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x20, INSTANCE_OF, "instance-of", k22c, true, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
-  V(0x21, ARRAY_LENGTH, "array-length", k12x, true, kIndexNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0x22, NEW_INSTANCE, "new-instance", k21c, true, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
-  V(0x23, NEW_ARRAY, "new-array", k22c, true, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
-  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, false, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArg) \
-  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, false, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArgRange) \
-  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, false, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyArrayData) \
-  V(0x27, THROW, "throw", k11x, false, kIndexNone, kThrow, kVerifyRegA) \
-  V(0x28, GOTO, "goto", k10t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x29, GOTO_16, "goto/16", k20t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x2A, GOTO_32, "goto/32", k30t, false, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x2B, PACKED_SWITCH, "packed-switch", k31t, false, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
-  V(0x2C, SPARSE_SWITCH, "sparse-switch", k31t, false, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
-  V(0x2D, CMPL_FLOAT, "cmpl-float", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x2E, CMPG_FLOAT, "cmpg-float", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x2F, CMPL_DOUBLE, "cmpl-double", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x30, CMPG_DOUBLE, "cmpg-double", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x31, CMP_LONG, "cmp-long", k23x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x32, IF_EQ, "if-eq", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x33, IF_NE, "if-ne", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x34, IF_LT, "if-lt", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x35, IF_GE, "if-ge", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x36, IF_GT, "if-gt", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x37, IF_LE, "if-le", k22t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x38, IF_EQZ, "if-eqz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x39, IF_NEZ, "if-nez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3A, IF_LTZ, "if-ltz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3B, IF_GEZ, "if-gez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3C, IF_GTZ, "if-gtz", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3D, IF_LEZ, "if-lez", k21t, false, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3E, UNUSED_3E, "unused-3e", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x3F, UNUSED_3F, "unused-3f", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x40, UNUSED_40, "unused-40", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x41, UNUSED_41, "unused-41", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x42, UNUSED_42, "unused-42", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x43, UNUSED_43, "unused-43", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x44, AGET, "aget", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x45, AGET_WIDE, "aget-wide", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x46, AGET_OBJECT, "aget-object", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x48, AGET_BYTE, "aget-byte", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x49, AGET_CHAR, "aget-char", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4A, AGET_SHORT, "aget-short", k23x, true, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4B, APUT, "aput", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4C, APUT_WIDE, "aput-wide", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x4D, APUT_OBJECT, "aput-object", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4F, APUT_BYTE, "aput-byte", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x50, APUT_CHAR, "aput-char", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x51, APUT_SHORT, "aput-short", k23x, false, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x52, IGET, "iget", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x53, IGET_WIDE, "iget-wide", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x54, IGET_OBJECT, "iget-object", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x56, IGET_BYTE, "iget-byte", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x57, IGET_CHAR, "iget-char", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x58, IGET_SHORT, "iget-short", k22c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x59, IPUT, "iput", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5A, IPUT_WIDE, "iput-wide", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x5B, IPUT_OBJECT, "iput-object", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5D, IPUT_BYTE, "iput-byte", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5E, IPUT_CHAR, "iput-char", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5F, IPUT_SHORT, "iput-short", k22c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x60, SGET, "sget", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x61, SGET_WIDE, "sget-wide", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x62, SGET_OBJECT, "sget-object", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x64, SGET_BYTE, "sget-byte", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x65, SGET_CHAR, "sget-char", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x66, SGET_SHORT, "sget-short", k21c, true, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x67, SPUT, "sput", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x68, SPUT_WIDE, "sput-wide", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x69, SPUT_OBJECT, "sput-object", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6B, SPUT_BYTE, "sput-byte", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6C, SPUT_CHAR, "sput-char", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6D, SPUT_SHORT, "sput-short", k21c, false, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x71, INVOKE_STATIC, "invoke-static", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
-  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, false, kIndexNone, kReturn, kVerifyNone) \
-  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
-  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, false, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x79, UNUSED_79, "unused-79", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x7A, UNUSED_7A, "unused-7a", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0x7B, NEG_INT, "neg-int", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x7C, NOT_INT, "not-int", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x7D, NEG_LONG, "neg-long", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x7E, NOT_LONG, "not-long", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x7F, NEG_FLOAT, "neg-float", k12x, true, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x80, NEG_DOUBLE, "neg-double", k12x, true, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x81, INT_TO_LONG, "int-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x84, LONG_TO_INT, "long-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, true, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x90, ADD_INT, "add-int", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x91, SUB_INT, "sub-int", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x92, MUL_INT, "mul-int", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x93, DIV_INT, "div-int", k23x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x94, REM_INT, "rem-int", k23x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x95, AND_INT, "and-int", k23x, true, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x96, OR_INT, "or-int", k23x, true, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x97, XOR_INT, "xor-int", k23x, true, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x98, SHL_INT, "shl-int", k23x, true, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x99, SHR_INT, "shr-int", k23x, true, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9A, USHR_INT, "ushr-int", k23x, true, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9B, ADD_LONG, "add-long", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9C, SUB_LONG, "sub-long", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9D, MUL_LONG, "mul-long", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9E, DIV_LONG, "div-long", k23x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9F, REM_LONG, "rem-long", k23x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA0, AND_LONG, "and-long", k23x, true, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA1, OR_LONG, "or-long", k23x, true, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA2, XOR_LONG, "xor-long", k23x, true, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA3, SHL_LONG, "shl-long", k23x, true, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA4, SHR_LONG, "shr-long", k23x, true, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA5, USHR_LONG, "ushr-long", k23x, true, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA6, ADD_FLOAT, "add-float", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA7, SUB_FLOAT, "sub-float", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA8, MUL_FLOAT, "mul-float", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA9, DIV_FLOAT, "div-float", k23x, true, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAA, REM_FLOAT, "rem-float", k23x, true, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAB, ADD_DOUBLE, "add-double", k23x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAC, SUB_DOUBLE, "sub-double", k23x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAD, MUL_DOUBLE, "mul-double", k23x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAE, DIV_DOUBLE, "div-double", k23x, true, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAF, REM_DOUBLE, "rem-double", k23x, true, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
-  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
-  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
-  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB) \
-  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB) \
-  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, true, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB) \
-  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, true, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB) \
-  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, true, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB) \
-  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, true, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB) \
-  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, true, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB) \
-  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, true, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB) \
-  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, true, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, true, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, true, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, true, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, true, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, true, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, true, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, true, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
-  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
-  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
-  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, true, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB) \
-  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, true, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB) \
-  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, true, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, true, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, true, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, true, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, true, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, true, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD1, RSUB_INT, "rsub-int", k22s, true, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, true, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, true, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, true, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, true, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, true, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, true, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, true, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, true, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, true, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, true, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, true, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, true, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, true, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, true, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, true, kIndexNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, true, kIndexNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, true, kIndexNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
-  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
-  V(0xEB, IPUT_BOOLEAN_QUICK, "iput-boolean-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEC, IPUT_BYTE_QUICK, "iput-byte-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xED, IPUT_CHAR_QUICK, "iput-char-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEE, IPUT_SHORT_QUICK, "iput-short-quick", k22c, false, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEF, IGET_BOOLEAN_QUICK, "iget-boolean-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kIndexNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \
-  V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xF5, CAPTURE_VARIABLE, "capture-variable", k21c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegBString) \
-  /* TODO(iam): get rid of the unused 'false' column */ \
-  V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kIndexMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
-  V(0xF7, LIBERATE_VARIABLE, "liberate-variable", k22c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCString) \
-  V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kIndexNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \
-  V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kIndexTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
-  V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFB, UNUSED_FB, "unused-fb", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFC, UNUSED_FC, "unused-fc", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFD, UNUSED_FD, "unused-fd", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFE, UNUSED_FE, "unused-fe", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xFF, UNUSED_FF, "unused-ff", k10x, false, kIndexUnknown, 0, kVerifyError)
+  V(0x00, NOP, "nop", k10x, kIndexNone, kContinue, kVerifyNone) \
+  V(0x01, MOVE, "move", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x02, MOVE_FROM16, "move/from16", k22x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x03, MOVE_16, "move/16", k32x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x04, MOVE_WIDE, "move-wide", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x05, MOVE_WIDE_FROM16, "move-wide/from16", k22x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x06, MOVE_WIDE_16, "move-wide/16", k32x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x07, MOVE_OBJECT, "move-object", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x08, MOVE_OBJECT_FROM16, "move-object/from16", k22x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x09, MOVE_OBJECT_16, "move-object/16", k32x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x0A, MOVE_RESULT, "move-result", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0B, MOVE_RESULT_WIDE, "move-result-wide", k11x, kIndexNone, kContinue, kVerifyRegAWide) \
+  V(0x0C, MOVE_RESULT_OBJECT, "move-result-object", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0D, MOVE_EXCEPTION, "move-exception", k11x, kIndexNone, kContinue, kVerifyRegA) \
+  V(0x0E, RETURN_VOID, "return-void", k10x, kIndexNone, kReturn, kVerifyNone) \
+  V(0x0F, RETURN, "return", k11x, kIndexNone, kReturn, kVerifyRegA) \
+  V(0x10, RETURN_WIDE, "return-wide", k11x, kIndexNone, kReturn, kVerifyRegAWide) \
+  V(0x11, RETURN_OBJECT, "return-object", k11x, kIndexNone, kReturn, kVerifyRegA) \
+  V(0x12, CONST_4, "const/4", k11n, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x13, CONST_16, "const/16", k21s, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x14, CONST, "const", k31i, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x15, CONST_HIGH16, "const/high16", k21h, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x18, CONST_WIDE, "const-wide", k51l, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x1A, CONST_STRING, "const-string", k21c, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
+  V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
+  V(0x1C, CONST_CLASS, "const-class", k21c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
+  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
+  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
+  V(0x1F, CHECK_CAST, "check-cast", k21c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
+  V(0x20, INSTANCE_OF, "instance-of", k22c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
+  V(0x21, ARRAY_LENGTH, "array-length", k12x, kIndexNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
+  V(0x22, NEW_INSTANCE, "new-instance", k21c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
+  V(0x23, NEW_ARRAY, "new-array", k22c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
+  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArg) \
+  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArgRange) \
+  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyArrayData) \
+  V(0x27, THROW, "throw", k11x, kIndexNone, kThrow, kVerifyRegA) \
+  V(0x28, GOTO, "goto", k10t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x29, GOTO_16, "goto/16", k20t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x2A, GOTO_32, "goto/32", k30t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
+  V(0x2B, PACKED_SWITCH, "packed-switch", k31t, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
+  V(0x2C, SPARSE_SWITCH, "sparse-switch", k31t, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
+  V(0x2D, CMPL_FLOAT, "cmpl-float", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x2E, CMPG_FLOAT, "cmpg-float", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x2F, CMPL_DOUBLE, "cmpl-double", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x30, CMPG_DOUBLE, "cmpg-double", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x31, CMP_LONG, "cmp-long", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x32, IF_EQ, "if-eq", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x33, IF_NE, "if-ne", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x34, IF_LT, "if-lt", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x35, IF_GE, "if-ge", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x36, IF_GT, "if-gt", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x37, IF_LE, "if-le", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x38, IF_EQZ, "if-eqz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x39, IF_NEZ, "if-nez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3A, IF_LTZ, "if-ltz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3B, IF_GEZ, "if-gez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3C, IF_GTZ, "if-gtz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3D, IF_LEZ, "if-lez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3E, UNUSED_3E, "unused-3e", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x3F, UNUSED_3F, "unused-3f", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x40, UNUSED_40, "unused-40", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x41, UNUSED_41, "unused-41", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x42, UNUSED_42, "unused-42", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x43, UNUSED_43, "unused-43", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x44, AGET, "aget", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x45, AGET_WIDE, "aget-wide", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x46, AGET_OBJECT, "aget-object", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x48, AGET_BYTE, "aget-byte", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x49, AGET_CHAR, "aget-char", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4A, AGET_SHORT, "aget-short", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4B, APUT, "aput", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4C, APUT_WIDE, "aput-wide", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x4D, APUT_OBJECT, "aput-object", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4F, APUT_BYTE, "aput-byte", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x50, APUT_CHAR, "aput-char", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x51, APUT_SHORT, "aput-short", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x52, IGET, "iget", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x53, IGET_WIDE, "iget-wide", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x54, IGET_OBJECT, "iget-object", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x56, IGET_BYTE, "iget-byte", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x57, IGET_CHAR, "iget-char", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x58, IGET_SHORT, "iget-short", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x59, IPUT, "iput", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5A, IPUT_WIDE, "iput-wide", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x5B, IPUT_OBJECT, "iput-object", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5D, IPUT_BYTE, "iput-byte", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5E, IPUT_CHAR, "iput-char", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5F, IPUT_SHORT, "iput-short", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x60, SGET, "sget", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x61, SGET_WIDE, "sget-wide", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x62, SGET_OBJECT, "sget-object", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x64, SGET_BYTE, "sget-byte", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x65, SGET_CHAR, "sget-char", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x66, SGET_SHORT, "sget-short", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x67, SPUT, "sput", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x68, SPUT_WIDE, "sput-wide", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x69, SPUT_OBJECT, "sput-object", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6B, SPUT_BYTE, "sput-byte", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6C, SPUT_CHAR, "sput-char", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6D, SPUT_SHORT, "sput-short", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x71, INVOKE_STATIC, "invoke-static", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
+  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, kIndexNone, kReturn, kVerifyNone) \
+  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
+  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x79, UNUSED_79, "unused-79", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x7A, UNUSED_7A, "unused-7a", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0x7B, NEG_INT, "neg-int", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x7C, NOT_INT, "not-int", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x7D, NEG_LONG, "neg-long", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x7E, NOT_LONG, "not-long", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x7F, NEG_FLOAT, "neg-float", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
+  V(0x80, NEG_DOUBLE, "neg-double", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x81, INT_TO_LONG, "int-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x84, LONG_TO_INT, "long-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x90, ADD_INT, "add-int", k23x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x91, SUB_INT, "sub-int", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x92, MUL_INT, "mul-int", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x93, DIV_INT, "div-int", k23x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x94, REM_INT, "rem-int", k23x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x95, AND_INT, "and-int", k23x, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x96, OR_INT, "or-int", k23x, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x97, XOR_INT, "xor-int", k23x, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x98, SHL_INT, "shl-int", k23x, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x99, SHR_INT, "shr-int", k23x, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9A, USHR_INT, "ushr-int", k23x, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9B, ADD_LONG, "add-long", k23x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9C, SUB_LONG, "sub-long", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9D, MUL_LONG, "mul-long", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9E, DIV_LONG, "div-long", k23x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9F, REM_LONG, "rem-long", k23x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA0, AND_LONG, "and-long", k23x, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA1, OR_LONG, "or-long", k23x, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA2, XOR_LONG, "xor-long", k23x, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA3, SHL_LONG, "shl-long", k23x, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA4, SHR_LONG, "shr-long", k23x, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA5, USHR_LONG, "ushr-long", k23x, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA6, ADD_FLOAT, "add-float", k23x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA7, SUB_FLOAT, "sub-float", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA8, MUL_FLOAT, "mul-float", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA9, DIV_FLOAT, "div-float", k23x, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAA, REM_FLOAT, "rem-float", k23x, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAB, ADD_DOUBLE, "add-double", k23x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAC, SUB_DOUBLE, "sub-double", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAD, MUL_DOUBLE, "mul-double", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAE, DIV_DOUBLE, "div-double", k23x, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAF, REM_DOUBLE, "rem-double", k23x, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB) \
+  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB) \
+  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB) \
+  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB) \
+  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB) \
+  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB) \
+  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD1, RSUB_INT, "rsub-int", k22s, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, kIndexNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, kIndexNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, kIndexNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE3, IGET_QUICK, "iget-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE6, IPUT_QUICK, "iput-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
+  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
+  V(0xEB, IPUT_BOOLEAN_QUICK, "iput-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEC, IPUT_BYTE_QUICK, "iput-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xED, IPUT_CHAR_QUICK, "iput-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEE, IPUT_SHORT_QUICK, "iput-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEF, IGET_BOOLEAN_QUICK, "iget-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF3, UNUSED_F3, "unused-f3", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF4, UNUSED_F4, "unused-f4", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF5, UNUSED_F5, "unused-f5", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF6, UNUSED_F6, "unused-f6", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF7, UNUSED_F7, "unused-f7", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF8, UNUSED_F8, "unused-f8", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \
+  /* TODO(narayan): The following two entries are placeholders. */ \
+  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexUnknown, 0, kVerifyError) \
+  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexUnknown, 0, kVerifyError) \
+  V(0xFC, UNUSED_FC, "unused-fc", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFD, UNUSED_FD, "unused-fd", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \
+  V(0xFF, UNUSED_FF, "unused-ff", k10x, kIndexUnknown, 0, kVerifyError)
 
 #define DEX_INSTRUCTION_FORMAT_LIST(V) \
   V(k10x) \
@@ -293,7 +294,6 @@
   V(k22t) \
   V(k22s) \
   V(k22c) \
-  V(k25x) \
   V(k32x) \
   V(k30t) \
   V(k31t) \
diff --git a/runtime/dex_instruction_test.cc b/runtime/dex_instruction_test.cc
index 671ac0e..95e4181 100644
--- a/runtime/dex_instruction_test.cc
+++ b/runtime/dex_instruction_test.cc
@@ -28,4 +28,107 @@
   EXPECT_EQ(Instruction::kVerifyNone, Instruction::VerifyFlagsOf(nop));
 }
 
+static void Build45cc(uint8_t num_args, uint16_t method_idx, uint16_t proto_idx,
+                      uint16_t arg_regs, uint16_t* out) {
+  // A = num argument registers
+  // B = method_idx
+  // C - G = argument registers
+  // H = proto_idx
+  //
+  // op = 0xFA
+  //
+  // format:
+  // AG op BBBB FEDC HHHH
+  out[0] = 0;
+  out[0] |= (num_args << 12);
+  out[0] |= 0x00FA;
+
+  out[1] = method_idx;
+  out[2] = arg_regs;
+  out[3] = proto_idx;
+}
+
+static void Build4rcc(uint16_t num_args, uint16_t method_idx, uint16_t proto_idx,
+                      uint16_t arg_regs_start, uint16_t* out) {
+  // A = num argument registers
+  // B = method_idx
+  // C = first argument register
+  // H = proto_idx
+  //
+  // op = 0xFB
+  //
+  // format:
+  // AA op BBBB CCCC HHHH
+  out[0] = 0;
+  out[0] |= (num_args << 8);
+  out[0] |= 0x00FB;
+
+  out[1] = method_idx;
+  out[2] = arg_regs_start;
+  out[3] = proto_idx;
+}
+
+TEST(Instruction, PropertiesOf45cc) {
+  uint16_t instruction[4];
+  Build45cc(4u /* num_vregs */, 16u /* method_idx */, 32u /* proto_idx */,
+            0xcafe /* arg_regs */, instruction);
+
+  const Instruction* ins = Instruction::At(instruction);
+  ASSERT_EQ(4u, ins->SizeInCodeUnits());
+
+  ASSERT_TRUE(ins->HasVRegA());
+  ASSERT_EQ(4, ins->VRegA());
+  ASSERT_EQ(4u, ins->VRegA_45cc());
+  ASSERT_EQ(4u, ins->VRegA_45cc(instruction[0]));
+
+  ASSERT_TRUE(ins->HasVRegB());
+  ASSERT_EQ(16, ins->VRegB());
+  ASSERT_EQ(16u, ins->VRegB_45cc());
+
+  ASSERT_TRUE(ins->HasVRegC());
+  ASSERT_EQ(0xe, ins->VRegC());
+  ASSERT_EQ(0xe, ins->VRegC_45cc());
+
+  ASSERT_TRUE(ins->HasVRegH());
+  ASSERT_EQ(32, ins->VRegH());
+  ASSERT_EQ(32, ins->VRegH_45cc());
+
+  ASSERT_TRUE(ins->HasVarArgs());
+
+  uint32_t arg_regs[Instruction::kMaxVarArgRegs];
+  ins->GetVarArgs(arg_regs);
+  ASSERT_EQ(0xeu, arg_regs[0]);
+  ASSERT_EQ(0xfu, arg_regs[1]);
+  ASSERT_EQ(0xau, arg_regs[2]);
+  ASSERT_EQ(0xcu, arg_regs[3]);
+}
+
+TEST(Instruction, PropertiesOf4rcc) {
+  uint16_t instruction[4];
+  Build4rcc(4u /* num_vregs */, 16u /* method_idx */, 32u /* proto_idx */,
+            0xcafe /* arg_regs */, instruction);
+
+  const Instruction* ins = Instruction::At(instruction);
+  ASSERT_EQ(4u, ins->SizeInCodeUnits());
+
+  ASSERT_TRUE(ins->HasVRegA());
+  ASSERT_EQ(4, ins->VRegA());
+  ASSERT_EQ(4u, ins->VRegA_4rcc());
+  ASSERT_EQ(4u, ins->VRegA_4rcc(instruction[0]));
+
+  ASSERT_TRUE(ins->HasVRegB());
+  ASSERT_EQ(16, ins->VRegB());
+  ASSERT_EQ(16u, ins->VRegB_4rcc());
+
+  ASSERT_TRUE(ins->HasVRegC());
+  ASSERT_EQ(0xcafe, ins->VRegC());
+  ASSERT_EQ(0xcafe, ins->VRegC_4rcc());
+
+  ASSERT_TRUE(ins->HasVRegH());
+  ASSERT_EQ(32, ins->VRegH());
+  ASSERT_EQ(32, ins->VRegH_4rcc());
+
+  ASSERT_FALSE(ins->HasVarArgs());
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction_visitor.h b/runtime/dex_instruction_visitor.h
index 795b95b..42af6a9 100644
--- a/runtime/dex_instruction_visitor.h
+++ b/runtime/dex_instruction_visitor.h
@@ -32,7 +32,7 @@
     while (i < size_in_code_units) {
       const Instruction* inst = Instruction::At(&code[i]);
       switch (inst->Opcode()) {
-#define INSTRUCTION_CASE(o, cname, p, f, r, i, a, v)  \
+#define INSTRUCTION_CASE(o, cname, p, f, i, a, v)  \
         case Instruction::cname: {                    \
           derived->Do_ ## cname(inst);                \
           break;                                      \
@@ -50,7 +50,7 @@
 
  private:
   // Specific handlers for each instruction.
-#define INSTRUCTION_VISITOR(o, cname, p, f, r, i, a, v)    \
+#define INSTRUCTION_VISITOR(o, cname, p, f, i, a, v)    \
   void Do_ ## cname(const Instruction* inst) {             \
     T* derived = static_cast<T*>(this);                    \
     derived->Do_Default(inst);                             \
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 916ca29..08fec91 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -19,7 +19,8 @@
 
 #include "entrypoint_utils.h"
 
-#include "art_method.h"
+#include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file.h"
@@ -39,45 +40,84 @@
 
 namespace art {
 
-template <bool kResolve = true>
 inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method,
                                     const InlineInfo& inline_info,
                                     const InlineInfoEncoding& encoding,
                                     uint8_t inlining_depth)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // This method is being used by artQuickResolutionTrampoline, before it sets up
+  // the passed parameters in a GC friendly way. Therefore we must never be
+  // suspended while executing it.
+  ScopedAssertNoThreadSuspension sants(Thread::Current(), __FUNCTION__);
+
   uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth);
   InvokeType invoke_type = static_cast<InvokeType>(
         inline_info.GetInvokeTypeAtDepth(encoding, inlining_depth));
-  ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*));
-  if (!caller->IsRuntimeMethod()) {
-    return caller;
-  }
-  if (!kResolve) {
-    return nullptr;
+  ArtMethod* inlined_method = outer_method->GetDexCacheResolvedMethod(method_index,
+                                                                      kRuntimePointerSize);
+  if (!inlined_method->IsRuntimeMethod()) {
+    return inlined_method;
   }
 
-  // The method in the dex cache can be the runtime method responsible for invoking
+  // The method in the dex cache is the runtime method responsible for invoking
   // the stub that will then update the dex cache. Therefore, we need to do the
   // resolution ourselves.
 
-  // We first find the class loader of our caller. If it is the outer method, we can directly
-  // use its class loader. Otherwise, we also need to resolve our caller.
-  StackHandleScope<2> hs(Thread::Current());
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  MutableHandle<mirror::ClassLoader> class_loader(hs.NewHandle<mirror::Class>(nullptr));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(outer_method->GetDexCache()));
-  if (inlining_depth == 0) {
-    class_loader.Assign(outer_method->GetClassLoader());
+  // We first find the dex cache of our caller. If it is the outer method, we can directly
+  // use its dex cache. Otherwise, we also need to resolve our caller.
+  ArtMethod* caller = outer_method;
+  if (inlining_depth != 0) {
+    caller = GetResolvedMethod(outer_method,
+                               inline_info,
+                               encoding,
+                               inlining_depth - 1);
+  }
+  DCHECK_EQ(caller->GetDexCache(), outer_method->GetDexCache())
+      << "Compiler only supports inlining calls within the same dex cache";
+  const DexFile* dex_file = outer_method->GetDexFile();
+  const DexFile::MethodId& method_id = dex_file->GetMethodId(method_index);
+
+  if (inline_info.GetDexPcAtDepth(encoding, inlining_depth) == static_cast<uint32_t>(-1)) {
+    // "charAt" special case. It is the only non-leaf method we inline across dex files.
+    if (kIsDebugBuild) {
+      const char* name = dex_file->StringDataByIdx(method_id.name_idx_);
+      DCHECK_EQ(std::string(name), "charAt");
+      DCHECK_EQ(std::string(dex_file->GetMethodShorty(method_id)), "CI")
+          << std::string(dex_file->GetMethodShorty(method_id));
+      DCHECK_EQ(std::string(dex_file->StringByTypeIdx(method_id.class_idx_)), "Ljava/lang/String;")
+          << std::string(dex_file->StringByTypeIdx(method_id.class_idx_));
+    }
+    mirror::Class* cls =
+        Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangString);
+    // Update the dex cache for future lookups.
+    caller->GetDexCache()->SetResolvedType(method_id.class_idx_, cls);
+    inlined_method = cls->FindVirtualMethod("charAt", "(I)C", kRuntimePointerSize);
   } else {
-    caller = GetResolvedMethod<kResolve>(outer_method,
-                                         inline_info,
-                                         encoding,
-                                         inlining_depth - 1);
-    class_loader.Assign(caller->GetClassLoader());
+    mirror::Class* klass = caller->GetDexCache()->GetResolvedType(method_id.class_idx_);
+    DCHECK_EQ(klass->GetDexCache(), caller->GetDexCache())
+        << "Compiler only supports inlining calls within the same dex cache";
+    switch (invoke_type) {
+      case kDirect:
+      case kStatic:
+        inlined_method =
+            klass->FindDirectMethod(klass->GetDexCache(), method_index, kRuntimePointerSize);
+        break;
+      case kSuper:
+      case kVirtual:
+        inlined_method =
+            klass->FindVirtualMethod(klass->GetDexCache(), method_index, kRuntimePointerSize);
+        break;
+      default:
+        LOG(FATAL) << "Unimplemented inlined invocation type: " << invoke_type;
+        UNREACHABLE();
+    }
   }
 
-  return class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
-      *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type);
+  // Update the dex cache for future lookups. Note that for static methods, this is safe
+  // when the class is being initialized, as the entrypoint for the ArtMethod is at
+  // this point still the resolution trampoline.
+  outer_method->SetDexCacheResolvedMethod(method_index, inlined_method, kRuntimePointerSize);
+  return inlined_method;
 }
 
 inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type)
@@ -92,7 +132,7 @@
                                        ArtMethod* method,
                                        Thread* self, bool* slow_path) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
   mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, pointer_size);
   if (UNLIKELY(klass == nullptr)) {
     klass = class_linker->ResolveType(type_idx, method);
@@ -110,6 +150,11 @@
       *slow_path = true;
       return nullptr;  // Failure
     }
+    if (UNLIKELY(klass->IsClassClass())) {
+      ThrowIllegalAccessError(nullptr, "Class %s is inaccessible", PrettyDescriptor(klass).c_str());
+      *slow_path = true;
+      return nullptr;  // Failure
+    }
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
@@ -237,7 +282,7 @@
     return nullptr;  // Failure
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
   mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, pointer_size);
   if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     klass = class_linker->ResolveType(type_idx, method);
@@ -343,7 +388,7 @@
     //
     // In particular, don't assume the dex instruction already correctly knows if the
     // real field is static or not. The resolution must not be aware of this.
-    ArtMethod* method = referrer->GetInterfaceMethodIfProxy(sizeof(void*));
+    ArtMethod* method = referrer->GetInterfaceMethodIfProxy(kRuntimePointerSize);
 
     StackHandleScope<2> hs(self);
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(method->GetDexCache()));
@@ -448,23 +493,10 @@
                      : ClassLinker::kNoICCECheckForCache;
     resolved_method = class_linker->ResolveMethod<resolve_mode>(self, method_idx, referrer, type);
   }
+  // Resolution and access check.
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
-  } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
-    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
-                 resolved_method->IsConstructor())) {
-      // Hack for String init:
-      //
-      // We assume that the input of String.<init> in verified code is always
-      // an unitialized reference. If it is a null constant, it must have been
-      // optimized out by the compiler. Do not throw NullPointerException.
-    } else {
-      // Maintain interpreter-like semantics where NullPointerException is thrown
-      // after potential NoSuchMethodError from class linker.
-      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
-      return nullptr;  // Failure.
-    }
   } else if (access_check) {
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
     bool can_access_resolved_method =
@@ -482,6 +514,22 @@
       return nullptr;  // Failure.
     }
   }
+  // Next, null pointer check.
+  if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
+    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
+                 resolved_method->IsConstructor())) {
+      // Hack for String init:
+      //
+      // We assume that the input of String.<init> in verified code is always
+      // an unitialized reference. If it is a null constant, it must have been
+      // optimized out by the compiler. Do not throw NullPointerException.
+    } else {
+      // Maintain interpreter-like semantics where NullPointerException is thrown
+      // after potential NoSuchMethodError from class linker.
+      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
+      return nullptr;  // Failure.
+    }
+  }
   switch (type) {
     case kStatic:
     case kDirect:
@@ -559,8 +607,8 @@
       }
     }
     case kInterface: {
-      uint32_t imt_index = resolved_method->GetDexMethodIndex() % ImTable::kSize;
-      size_t pointer_size = class_linker->GetImagePointerSize();
+      uint32_t imt_index = resolved_method->GetImtIndex();
+      PointerSize pointer_size = class_linker->GetImagePointerSize();
       ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
           Get(imt_index, pointer_size);
       if (!imt_method->IsRuntimeMethod()) {
@@ -614,7 +662,8 @@
 inline ArtField* FindFieldFast(uint32_t field_idx, ArtMethod* referrer, FindFieldType type,
                                size_t expected_size) {
   ArtField* resolved_field =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx, sizeof(void*));
+      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx,
+                                                                     kRuntimePointerSize);
   if (UNLIKELY(resolved_field == nullptr)) {
     return nullptr;
   }
@@ -669,7 +718,7 @@
   }
   mirror::Class* referring_class = referrer->GetDeclaringClass();
   ArtMethod* resolved_method =
-      referring_class->GetDexCache()->GetResolvedMethod(method_idx, sizeof(void*));
+      referring_class->GetDexCache()->GetResolvedMethod(method_idx, kRuntimePointerSize);
   if (UNLIKELY(resolved_method == nullptr)) {
     return nullptr;
   }
@@ -688,7 +737,8 @@
     }
   }
   if (type == kInterface) {  // Most common form of slow path dispatch.
-    return this_object->GetClass()->FindVirtualMethodForInterface(resolved_method, sizeof(void*));
+    return this_object->GetClass()->FindVirtualMethodForInterface(resolved_method,
+                                                                  kRuntimePointerSize);
   } else if (type == kStatic || type == kDirect) {
     return resolved_method;
   } else if (type == kSuper) {
@@ -711,15 +761,15 @@
         // The super class does not have the method.
         return nullptr;
       }
-      return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), sizeof(void*));
+      return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), kRuntimePointerSize);
     } else {
       return method_reference_class->FindVirtualMethodForInterfaceSuper(
-          resolved_method, sizeof(void*));
+          resolved_method, kRuntimePointerSize);
     }
   } else {
     DCHECK(type == kVirtual);
     return this_object->GetClass()->GetVTableEntry(
-        resolved_method->GetMethodIndex(), sizeof(void*));
+        resolved_method->GetMethodIndex(), kRuntimePointerSize);
   }
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 197caa1..fd1c02f 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/mutex.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
@@ -48,7 +49,7 @@
     return nullptr;  // Failure
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  size_t pointer_size = class_linker->GetImagePointerSize();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
   mirror::Class* klass = referrer->GetDexCacheResolvedType<false>(type_idx, pointer_size);
   if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     klass = class_linker->ResolveType(type_idx, referrer);
@@ -125,7 +126,7 @@
   }
   // Make sure that the result is an instance of the type this method was expected to return.
   mirror::Class* return_type = self->GetCurrentMethod(nullptr)->GetReturnType(true /* resolve */,
-                                                                              sizeof(void*));
+                                                                              kRuntimePointerSize);
 
   if (!o->InstanceOf(return_type)) {
     Runtime::Current()->GetJavaVM()->JniAbortF(nullptr,
@@ -188,7 +189,7 @@
       StackHandleScope<1> hs(soa.Self());
       auto h_interface_method(hs.NewHandle(soa.Decode<mirror::Method*>(interface_method_jobj)));
       // This can cause thread suspension.
-      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       mirror::Class* result_type =
           h_interface_method->GetArtMethod()->GetReturnType(true /* resolve */, pointer_size);
       mirror::Object* result_ref = soa.Decode<mirror::Object*>(result);
@@ -208,10 +209,10 @@
       mirror::Class* proxy_class = rcvr->GetClass();
       mirror::Method* interface_method = soa.Decode<mirror::Method*>(interface_method_jobj);
       ArtMethod* proxy_method = rcvr->GetClass()->FindVirtualMethodForInterface(
-          interface_method->GetArtMethod(), sizeof(void*));
-      auto virtual_methods = proxy_class->GetVirtualMethodsSlice(sizeof(void*));
+          interface_method->GetArtMethod(), kRuntimePointerSize);
+      auto virtual_methods = proxy_class->GetVirtualMethodsSlice(kRuntimePointerSize);
       size_t num_virtuals = proxy_class->NumVirtualMethods();
-      size_t method_size = ArtMethod::Size(sizeof(void*));
+      size_t method_size = ArtMethod::Size(kRuntimePointerSize);
       // Rely on the fact that the methods are contiguous to determine the index of the method in
       // the slice.
       int throws_index = (reinterpret_cast<uintptr_t>(proxy_method) -
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index 331de91..a81a7e7 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "base/mutex.h"
 #include "runtime.h"
 #include "thread-inl.h"
@@ -86,7 +87,7 @@
 }
 
 // Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
-static constexpr size_t GetConstExprPointerSize(InstructionSet isa) {
+static constexpr PointerSize GetConstExprPointerSize(InstructionSet isa) {
   // constexpr must be a return statement.
   return (isa == kArm || isa == kThumb2) ? kArmPointerSize :
          isa == kArm64 ? kArm64PointerSize :
@@ -94,14 +95,14 @@
          isa == kMips64 ? kMips64PointerSize :
          isa == kX86 ? kX86PointerSize :
          isa == kX86_64 ? kX86_64PointerSize :
-         isa == kNone ? (LOG(FATAL) << "kNone has no pointer size", 0) :
-         (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+         isa == kNone ? (LOG(FATAL) << "kNone has no pointer size", PointerSize::k32) :
+         (LOG(FATAL) << "Unknown instruction set" << isa, PointerSize::k32);
 }
 
 // Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
 static constexpr size_t GetCalleeSaveReturnPcOffset(InstructionSet isa,
                                                     Runtime::CalleeSaveType type) {
-  return GetCalleeSaveFrameSize(isa, type) - GetConstExprPointerSize(isa);
+  return GetCalleeSaveFrameSize(isa, type) - static_cast<size_t>(GetConstExprPointerSize(isa));
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 4e4f851..4686a51 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -17,6 +17,7 @@
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "callee_save_frame.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/class-inl.h"
@@ -32,8 +33,8 @@
     uint32_t type_idx, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
-    mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, sizeof(void*)); \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
+    mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, kRuntimePointerSize); \
     if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
       size_t byte_count = klass->GetObjectSize(); \
       byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
@@ -59,7 +60,7 @@
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     if (LIKELY(klass->IsInitialized())) { \
       size_t byte_count = klass->GetObjectSize(); \
       byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
@@ -85,7 +86,7 @@
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     size_t byte_count = klass->GetObjectSize(); \
     byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
     mirror::Object* obj; \
@@ -136,7 +137,7 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (!instrumented_bool) { \
+  if (!(instrumented_bool)) { \
     return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, false, allocator_type); \
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, false, allocator_type); \
@@ -146,7 +147,7 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (!instrumented_bool) { \
+  if (!(instrumented_bool)) { \
     return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, true, allocator_type); \
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, true, allocator_type); \
@@ -170,7 +171,7 @@
   return mirror::String::AllocFromCharArray<instrumented_bool>(self, char_count, handle_array, \
                                                                offset, allocator_type); \
 } \
-extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( \
+extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( /* NOLINT */ \
     mirror::String* string, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   StackHandleScope<1> hs(self); \
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 968ac53..8db69a3 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -20,7 +20,7 @@
 namespace art {
 
 // Assignable test for code, won't throw.  Null and equality tests already performed
-extern "C" uint32_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
+extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(klass != nullptr);
   DCHECK(ref_class != nullptr);
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index f3a0d2f..86fb881 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -50,16 +50,16 @@
 extern "C" int art_quick_set64_static(uint32_t, int64_t);
 extern "C" int art_quick_set_obj_instance(uint32_t, void*, void*);
 extern "C" int art_quick_set_obj_static(uint32_t, void*);
-extern "C" int8_t art_quick_get_byte_instance(uint32_t, void*);
-extern "C" uint8_t art_quick_get_boolean_instance(uint32_t, void*);
-extern "C" int8_t art_quick_get_byte_static(uint32_t);
-extern "C" uint8_t art_quick_get_boolean_static(uint32_t);
-extern "C" int16_t art_quick_get_short_instance(uint32_t, void*);
-extern "C" uint16_t art_quick_get_char_instance(uint32_t, void*);
-extern "C" int16_t art_quick_get_short_static(uint32_t);
-extern "C" uint16_t art_quick_get_char_static(uint32_t);
-extern "C" int32_t art_quick_get32_instance(uint32_t, void*);
-extern "C" int32_t art_quick_get32_static(uint32_t);
+extern "C" ssize_t art_quick_get_byte_instance(uint32_t, void*);
+extern "C" size_t art_quick_get_boolean_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get_byte_static(uint32_t);
+extern "C" size_t art_quick_get_boolean_static(uint32_t);
+extern "C" ssize_t art_quick_get_short_instance(uint32_t, void*);
+extern "C" size_t art_quick_get_char_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get_short_static(uint32_t);
+extern "C" size_t art_quick_get_char_static(uint32_t);
+extern "C" ssize_t art_quick_get32_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get32_static(uint32_t);
 extern "C" int64_t art_quick_get64_instance(uint32_t, void*);
 extern "C" int64_t art_quick_get64_static(uint32_t);
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
@@ -120,6 +120,8 @@
 extern "C" void art_quick_throw_div_zero();
 extern "C" void art_quick_throw_no_such_method(int32_t method_idx);
 extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal(uintptr_t address);
 extern "C" void art_quick_throw_stack_overflow(void*);
+extern "C" void art_quick_throw_string_bounds(int32_t index, int32_t limit);
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_EXTERNS_H_
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 5dafa8b..2a206c2 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -73,11 +73,13 @@
 
   // JNI
   qpoints->pJniMethodStart = JniMethodStart;
+  qpoints->pJniMethodFastStart = JniMethodFastStart;
   qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
   qpoints->pJniMethodEnd = JniMethodEnd;
   qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
   qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
+  qpoints->pJniMethodFastEnd = JniMethodFastEnd;
   qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
 
   // Locks
@@ -114,6 +116,7 @@
   qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+  qpoints->pThrowStringBounds = art_quick_throw_string_bounds;
 
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index c019cae..f35c2fe 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -29,39 +29,51 @@
 
 namespace art {
 
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-
+NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
   if (VLOG_IS_ON(deopt)) {
-    LOG(INFO) << "Deopting:";
-    self->Dump(LOG(INFO));
+    if (single_frame) {
+      // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
+      // specialized visitor that will show whether a method is Quick or Shadow.
+    } else {
+      LOG(INFO) << "Deopting:";
+      self->Dump(LOG(INFO));
+    }
   }
 
   self->AssertHasDeoptimizationContext();
-  self->SetException(Thread::GetDeoptimizationException());
-  self->QuickDeliverException();
+  QuickExceptionHandler exception_handler(self, true);
+  if (single_frame) {
+    exception_handler.DeoptimizeSingleFrame();
+  } else {
+    exception_handler.DeoptimizeStack();
+  }
+  uintptr_t return_pc = exception_handler.UpdateInstrumentationStack();
+  if (exception_handler.IsFullFragmentDone()) {
+    exception_handler.DoLongJump(true);
+  } else {
+    exception_handler.DeoptimizePartialFragmentFixup(return_pc);
+    // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
+    // be caller-saved. This has the downside that we cannot track incorrect register usage down the
+    // line.
+    exception_handler.DoLongJump(false);
+  }
 }
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  artDeoptimizeImpl(self, false);
+}
+
+// This is called directly from compiled code by an HDepptimize.
 extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-
-  // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
-  // specialized visitor that will show whether a method is Quick or Shadow.
-
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
   self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
-
-  QuickExceptionHandler exception_handler(self, true);
-  exception_handler.DeoptimizeSingleFrame();
-  exception_handler.UpdateInstrumentationStack();
-  exception_handler.DeoptimizeSingleFrameArchDependentFixup();
-  // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
-  // be caller-saved. This has the downside that we cannot track incorrect register usage down the
-  // line.
-  exception_handler.DoLongJump(false);
+  artDeoptimizeImpl(self, true);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index b12b118..c045e84 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -31,7 +31,7 @@
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, true, false);
 }
 
@@ -39,7 +39,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, false, false);
 }
 
@@ -48,14 +48,14 @@
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveVerifyAndClinit(type_idx, caller, self, false, true);
 }
 
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
   return ResolveStringFromCode(caller, string_idx);
 }
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index f5b68fa..08e0d6e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -52,10 +52,13 @@
 // JNI entrypoints.
 // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
 extern uint32_t JniMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern uint32_t JniMethodFastStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
                                      Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
diff --git a/runtime/entrypoints/quick/quick_entrypoints_enum.h b/runtime/entrypoints/quick/quick_entrypoints_enum.h
index 5a95491..8de1137 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_enum.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_enum.h
@@ -36,7 +36,7 @@
 std::ostream& operator<<(std::ostream& os, const QuickEntrypointEnum& kind);
 
 // Translate a QuickEntrypointEnum value to the corresponding ThreadOffset.
-template <size_t pointer_size>
+template <PointerSize pointer_size>
 static ThreadOffset<pointer_size> GetThreadOffset(QuickEntrypointEnum trampoline) {
   switch (trampoline)
   {  // NOLINT(whitespace/braces)
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 79d1c13..74c928a 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -33,7 +33,7 @@
   V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
   V(AllocStringFromString, void*, void*) \
 \
-  V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \
+  V(InstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*) \
   V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
 \
   V(InitializeStaticStorage, void*, uint32_t) \
@@ -51,16 +51,16 @@
   V(Set64Static, int, uint32_t, int64_t) \
   V(SetObjInstance, int, uint32_t, void*, void*) \
   V(SetObjStatic, int, uint32_t, void*) \
-  V(GetByteInstance, int8_t, uint32_t, void*) \
-  V(GetBooleanInstance, uint8_t, uint32_t, void*) \
-  V(GetByteStatic, int8_t, uint32_t) \
-  V(GetBooleanStatic, uint8_t, uint32_t) \
-  V(GetShortInstance, int16_t, uint32_t, void*) \
-  V(GetCharInstance, uint16_t, uint32_t, void*) \
-  V(GetShortStatic, int16_t, uint32_t) \
-  V(GetCharStatic, uint16_t, uint32_t) \
-  V(Get32Instance, int32_t, uint32_t, void*) \
-  V(Get32Static, int32_t, uint32_t) \
+  V(GetByteInstance, ssize_t, uint32_t, void*) \
+  V(GetBooleanInstance, size_t, uint32_t, void*) \
+  V(GetByteStatic, ssize_t, uint32_t) \
+  V(GetBooleanStatic, size_t, uint32_t) \
+  V(GetShortInstance, ssize_t, uint32_t, void*) \
+  V(GetCharInstance, size_t, uint32_t, void*) \
+  V(GetShortStatic, ssize_t, uint32_t) \
+  V(GetCharStatic, size_t, uint32_t) \
+  V(Get32Instance, ssize_t, uint32_t, void*) \
+  V(Get32Static, ssize_t, uint32_t) \
   V(Get64Instance, int64_t, uint32_t, void*) \
   V(Get64Static, int64_t, uint32_t) \
   V(GetObjInstance, void*, uint32_t, void*) \
@@ -72,8 +72,10 @@
   V(HandleFillArrayData, void, void*, void*) \
 \
   V(JniMethodStart, uint32_t, Thread*) \
+  V(JniMethodFastStart, uint32_t, Thread*) \
   V(JniMethodStartSynchronized, uint32_t, jobject, Thread*) \
   V(JniMethodEnd, void, uint32_t, Thread*) \
+  V(JniMethodFastEnd, void, uint32_t, Thread*) \
   V(JniMethodEndSynchronized, void, uint32_t, jobject, Thread*) \
   V(JniMethodEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
   V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, uint32_t, jobject, Thread*) \
@@ -140,6 +142,7 @@
   V(ThrowNoSuchMethod, void, int32_t) \
   V(ThrowNullPointer, void, void) \
   V(ThrowStackOverflow, void, void*) \
+  V(ThrowStringBounds, void, int32_t, int32_t) \
   V(Deoptimize, void, void) \
 \
   V(A64Load, int64_t, volatile const int64_t *) \
@@ -163,9 +166,39 @@
   V(NewStringFromStringBuilder, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
-  V(ReadBarrierMark, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg01, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg02, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg03, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg04, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg05, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg06, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg07, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg08, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg09, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg10, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg11, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg12, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg13, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg14, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg15, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg16, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg17, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg18, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg19, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg20, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg21, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg22, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg23, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg24, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg25, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg26, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg27, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg28, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg29, mirror::Object*, mirror::Object*) \
   V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
-  V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*)
+  V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \
+\
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index a245f18..1a12bd4 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -55,9 +55,7 @@
   return field;
 }
 
-extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx,
-                                           ArtMethod* referrer,
-                                           Thread* self)
+extern "C" ssize_t artGetByteStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
@@ -71,9 +69,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx,
-                                               ArtMethod* referrer,
-                                               Thread* self)
+extern "C" size_t artGetBooleanStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
@@ -87,9 +83,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" ssize_t artGetShortStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
@@ -103,9 +97,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint16_t artGetCharStaticFromCode(uint32_t field_idx,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" size_t artGetCharStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
@@ -119,9 +111,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx,
-                                           ArtMethod* referrer,
-                                           Thread* self)
+extern "C" size_t artGet32StaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int32_t));
@@ -173,10 +163,10 @@
   return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx,
-                                             mirror::Object* obj,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" ssize_t artGetByteInstanceFromCode(uint32_t field_idx,
+                                              mirror::Object* obj,
+                                              ArtMethod* referrer,
+                                              Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
@@ -194,10 +184,10 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx,
-                                                 mirror::Object* obj,
-                                                 ArtMethod* referrer,
-                                                 Thread* self)
+extern "C" size_t artGetBooleanInstanceFromCode(uint32_t field_idx,
+                                                mirror::Object* obj,
+                                                ArtMethod* referrer,
+                                                Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
@@ -214,7 +204,7 @@
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
-extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx,
+extern "C" ssize_t artGetShortInstanceFromCode(uint32_t field_idx,
                                                mirror::Object* obj,
                                                ArtMethod* referrer,
                                                Thread* self)
@@ -235,10 +225,10 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx,
-                                               mirror::Object* obj,
-                                               ArtMethod* referrer,
-                                               Thread* self)
+extern "C" size_t artGetCharInstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
@@ -256,10 +246,10 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx,
-                                             mirror::Object* obj,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" size_t artGet32InstanceFromCode(uint32_t field_idx,
+                                           mirror::Object* obj,
+                                           ArtMethod* referrer,
+                                           Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int32_t));
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 8e660a2..82d5467 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -15,6 +15,7 @@
  */
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "callee_save_frame.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "instrumentation.h"
@@ -37,7 +38,7 @@
   if (instrumentation->IsDeoptimized(method)) {
     result = GetQuickToInterpreterBridge();
   } else {
-    result = instrumentation->GetQuickCodeFor(method, sizeof(void*));
+    result = instrumentation->GetQuickCodeFor(method, kRuntimePointerSize);
     DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
   }
   bool interpreter_entry = (result == GetQuickToInterpreterBridge());
@@ -55,7 +56,7 @@
   CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
                                      << self->GetException()->Dump();
   // Compute address of return PC and sanity check that it currently holds 0.
-  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
+  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
                                                       return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 58f256a..c06824c 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -29,6 +29,21 @@
   handle_on_stack->Assign(to_ref);
 }
 
+// Called on entry to fast JNI, push a new local reference table only.
+extern uint32_t JniMethodFastStart(Thread* self) {
+  JNIEnvExt* env = self->GetJniEnv();
+  DCHECK(env != nullptr);
+  uint32_t saved_local_ref_cookie = env->local_ref_cookie;
+  env->local_ref_cookie = env->locals.GetSegmentState();
+
+  if (kIsDebugBuild) {
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+    CHECK(native_method->IsAnnotatedWithFastNative()) << PrettyMethod(native_method);
+  }
+
+  return saved_local_ref_cookie;
+}
+
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
 extern uint32_t JniMethodStart(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
@@ -73,11 +88,32 @@
   self->PopHandleScope();
 }
 
+// TODO: These should probably be templatized or macro-ized.
+// Otherwise there's just too much repetitive boilerplate.
+
 extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self) {
   GoToRunnable(self);
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
+extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self) {
+  // inlined fast version of GoToRunnable(self);
+
+  if (kIsDebugBuild) {
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+    CHECK(native_method->IsAnnotatedWithFastNative()) << PrettyMethod(native_method);
+  }
+
+  if (UNLIKELY(self->TestAllFlags())) {
+    // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
+    // is a flag raised.
+    DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
+    self->CheckSuspend();
+  }
+
+  PopLocalReferences(saved_local_ref_cookie, self);
+}
+
 extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
                                      Thread* self) {
   GoToRunnable(self);
@@ -85,6 +121,10 @@
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
+// TODO: JniMethodFastEndWithReference
+// (Probably don't need to have a synchronized variant since
+// it already has to do atomic operations)
+
 // Common result handling for EndWithReference.
 static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result,
                                                              uint32_t saved_local_ref_cookie,
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 5256fea..ea9f7b0 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -29,7 +29,7 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an exception.
+// Called by generated code to throw an exception.
 extern "C" NO_RETURN void artDeliverExceptionFromCode(mirror::Throwable* exception, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   /*
@@ -48,17 +48,27 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw a NPE exception.
+// Called by generated code to throw a NPE exception.
 extern "C" NO_RETURN void artThrowNullPointerExceptionFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+  // We come from an explicit check in the generated code. This path is triggered
+  // only if the object is indeed null.
+  ThrowNullPointerExceptionFromDexPC(/* check_address */ false, 0U);
+  self->QuickDeliverException();
+}
+
+// Installed by a signal handler to throw a NPE exception.
+extern "C" NO_RETURN void artThrowNullPointerExceptionFromSignal(uintptr_t addr, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   self->NoteSignalBeingHandled();
-  ThrowNullPointerExceptionFromDexPC();
+  ThrowNullPointerExceptionFromDexPC(/* check_address */ true, addr);
   self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an arithmetic divide by zero exception.
+// Called by generated code to throw an arithmetic divide by zero exception.
 extern "C" NO_RETURN void artThrowDivZeroFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -66,7 +76,7 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an array index out of bounds exception.
+// Called by generated code to throw an array index out of bounds exception.
 extern "C" NO_RETURN void artThrowArrayBoundsFromCode(int index, int length, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -74,6 +84,14 @@
   self->QuickDeliverException();
 }
 
+// Called by generated code to throw a string index out of bounds exception.
+extern "C" NO_RETURN void artThrowStringBoundsFromCode(int index, int length, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  ThrowStringIndexOutOfBoundsException(index, length);
+  self->QuickDeliverException();
+}
+
 extern "C" NO_RETURN void artThrowStackOverflowFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 41033ab..c67379a 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -15,6 +15,7 @@
  */
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "callee_save_frame.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
@@ -45,7 +46,7 @@
   static constexpr size_t kBytesStackArgLocation = 4;
   // Frame size in bytes of a callee-save frame for RefsAndArgs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize =
-      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
+      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
 #if defined(__arm__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -74,11 +75,11 @@
   static constexpr size_t kNumQuickFprArgs = kArm32QuickCodeUseSoftFloat ? 0 : 16;
   static constexpr bool kGprFprLockstep = false;
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
-      arm::ArmCalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
+      arm::ArmCalleeSaveFpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
-      arm::ArmCalleeSaveGpr1Offset(Runtime::kRefsAndArgs);  // Offset of first GPR arg.
+      arm::ArmCalleeSaveGpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
-      arm::ArmCalleeSaveLrOffset(Runtime::kRefsAndArgs);  // Offset of return address.
+      arm::ArmCalleeSaveLrOffset(Runtime::kSaveRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -112,11 +113,11 @@
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr bool kGprFprLockstep = false;
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
-      arm64::Arm64CalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
+      arm64::Arm64CalleeSaveFpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
-      arm64::Arm64CalleeSaveGpr1Offset(Runtime::kRefsAndArgs);  // Offset of first GPR arg.
+      arm64::Arm64CalleeSaveGpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
-      arm64::Arm64CalleeSaveLrOffset(Runtime::kRefsAndArgs);  // Offset of return address.
+      arm64::Arm64CalleeSaveLrOffset(Runtime::kSaveRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -306,7 +307,7 @@
 
   static ArtMethod* GetCallingMethod(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    return GetCalleeSaveMethodCaller(sp, Runtime::kRefsAndArgs);
+    return GetCalleeSaveMethodCaller(sp, Runtime::kSaveRefsAndArgs);
   }
 
   static ArtMethod* GetOuterMethod(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -318,7 +319,7 @@
 
   static uint32_t GetCallingDexPc(ArtMethod** sp) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
+    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
     ArtMethod** caller_sp = reinterpret_cast<ArtMethod**>(
         reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
     uintptr_t outer_pc = QuickArgumentVisitor::GetCallingPc(sp);
@@ -366,7 +367,7 @@
     // next register is even.
     static_assert(!kQuickDoubleRegAlignedFloatBackFilled || kNumQuickFprArgs % 2 == 0,
                   "Number of Quick FPR arguments not even");
-    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
   }
 
   virtual ~QuickArgumentVisitor() {}
@@ -654,12 +655,12 @@
 
   JValue tmp_value;
   ShadowFrame* deopt_frame = self->PopStackedShadowFrame(
-      StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false);
+      StackedShadowFrameType::kDeoptimizationShadowFrame, false);
   ManagedStack fragment;
 
   DCHECK(!method->IsNative()) << PrettyMethod(method);
   uint32_t shorty_len = 0;
-  ArtMethod* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
+  ArtMethod* non_proxy_method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   const DexFile::CodeItem* code_item = non_proxy_method->GetCodeItem();
   DCHECK(code_item != nullptr) << PrettyMethod(method);
   const char* shorty = non_proxy_method->GetShorty(&shorty_len);
@@ -667,7 +668,7 @@
   JValue result;
 
   if (deopt_frame != nullptr) {
-    // Coming from single-frame deopt.
+    // Coming from partial-fragment deopt.
 
     if (kIsDebugBuild) {
       // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom
@@ -681,7 +682,7 @@
     }
 
     if (VLOG_IS_ON(deopt)) {
-      // Print out the stack to verify that it was a single-frame deopt.
+      // Print out the stack to verify that it was a partial-fragment deopt.
       LOG(INFO) << "Continue-ing from deopt. Stack is:";
       QuickExceptionHandler::DumpFramesWithType(self, true);
     }
@@ -689,7 +690,6 @@
     mirror::Throwable* pending_exception = nullptr;
     bool from_code = false;
     self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
-    CHECK(from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -755,7 +755,12 @@
 
   // Request a stack deoptimization if needed
   ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
-  if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+  uintptr_t caller_pc = QuickArgumentVisitor::GetCallingPc(sp);
+  // If caller_pc is the instrumentation exit stub, the stub will check to see if deoptimization
+  // should be done and it knows the real return pc.
+  if (UNLIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()) &&
+               Dbg::IsForcedInterpreterNeededForUpcall(self, caller) &&
+               Runtime::Current()->IsDeoptimizeable(caller_pc))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
     self->PushDeoptimizationContext(
@@ -855,7 +860,7 @@
   jobject rcvr_jobj = soa.AddLocalReference<jobject>(receiver);
 
   // Placing arguments into args vector and remove the receiver.
-  ArtMethod* non_proxy_method = proxy_method->GetInterfaceMethodIfProxy(sizeof(void*));
+  ArtMethod* non_proxy_method = proxy_method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   CHECK(!non_proxy_method->IsStatic()) << PrettyMethod(proxy_method) << " "
                                        << PrettyMethod(non_proxy_method);
   std::vector<jvalue> args;
@@ -868,12 +873,15 @@
   args.erase(args.begin());
 
   // Convert proxy method into expected interface method.
-  ArtMethod* interface_method = proxy_method->FindOverriddenMethod(sizeof(void*));
+  ArtMethod* interface_method = proxy_method->FindOverriddenMethod(kRuntimePointerSize);
   DCHECK(interface_method != nullptr) << PrettyMethod(proxy_method);
   DCHECK(!interface_method->IsProxyMethod()) << PrettyMethod(interface_method);
   self->EndAssertNoThreadSuspension(old_cause);
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
   jobject interface_method_jobj = soa.AddLocalReference<jobject>(
-      mirror::Method::CreateFromArtMethod(soa.Self(), interface_method));
+      mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(),
+                                                                      interface_method));
 
   // All naked Object*s should now be in jobjects, so its safe to go into the main invoke code
   // that performs allocations.
@@ -1031,10 +1039,10 @@
       ArtMethod* orig_called = called;
       if (invoke_type == kVirtual) {
         CHECK(receiver != nullptr) << invoke_type;
-        called = receiver->GetClass()->FindVirtualMethodForVirtual(called, sizeof(void*));
+        called = receiver->GetClass()->FindVirtualMethodForVirtual(called, kRuntimePointerSize);
       } else if (invoke_type == kInterface) {
         CHECK(receiver != nullptr) << invoke_type;
-        called = receiver->GetClass()->FindVirtualMethodForInterface(called, sizeof(void*));
+        called = receiver->GetClass()->FindVirtualMethodForInterface(called, kRuntimePointerSize);
       } else {
         DCHECK_EQ(invoke_type, kSuper);
         CHECK(caller != nullptr) << invoke_type;
@@ -1047,10 +1055,10 @@
         mirror::Class* ref_class = linker->ResolveReferencedClassOfMethod(
             called_method.dex_method_index, dex_cache, class_loader);
         if (ref_class->IsInterface()) {
-          called = ref_class->FindVirtualMethodForInterfaceSuper(called, sizeof(void*));
+          called = ref_class->FindVirtualMethodForInterfaceSuper(called, kRuntimePointerSize);
         } else {
           called = caller->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
-              called->GetMethodIndex(), sizeof(void*));
+              called->GetMethodIndex(), kRuntimePointerSize);
         }
       }
 
@@ -1064,7 +1072,7 @@
       // FindVirtualMethodFor... This is ok for FindDexMethodIndexInOtherDexFile that only cares
       // about the name and signature.
       uint32_t update_dex_cache_method_index = called->GetDexMethodIndex();
-      if (!called->HasSameDexCacheResolvedMethods(caller, sizeof(void*))) {
+      if (!called->HasSameDexCacheResolvedMethods(caller, kRuntimePointerSize)) {
         // Calling from one dex file to another, need to compute the method index appropriate to
         // the caller's dex file. Since we get here only if the original called was a runtime
         // method, we've got the correct dex_file and a dex_method_idx from above.
@@ -1078,8 +1086,10 @@
       }
       if ((update_dex_cache_method_index != DexFile::kDexNoIndex) &&
           (caller->GetDexCacheResolvedMethod(
-              update_dex_cache_method_index, sizeof(void*)) != called)) {
-        caller->SetDexCacheResolvedMethod(update_dex_cache_method_index, called, sizeof(void*));
+              update_dex_cache_method_index, kRuntimePointerSize) != called)) {
+        caller->SetDexCacheResolvedMethod(update_dex_cache_method_index,
+                                          called,
+                                          kRuntimePointerSize);
       }
     } else if (invoke_type == kStatic) {
       const auto called_dex_method_idx = called->GetDexMethodIndex();
@@ -1089,7 +1099,9 @@
       // b/19175856
       if (called->GetDexFile() == called_method.dex_file &&
           called_method.dex_method_index != called_dex_method_idx) {
-        called->GetDexCache()->SetResolvedMethod(called_dex_method_idx, called, sizeof(void*));
+        called->GetDexCache()->SetResolvedMethod(called_dex_method_idx,
+                                                 called,
+                                                 kRuntimePointerSize);
       }
     }
 
@@ -1623,7 +1635,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* method = **m;
 
-    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
 
     uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp);
 
@@ -2042,7 +2054,7 @@
 static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, Thread* self,
                                      ArtMethod** sp) {
   ScopedQuickEntrypointChecks sqec(self);
-  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
+  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
   ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
   ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type);
   if (UNLIKELY(method == nullptr)) {
@@ -2137,11 +2149,7 @@
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> cls(hs.NewHandle(this_object->GetClass()));
 
-  // The optimizing compiler currently does not inline methods that have an interface
-  // invocation. We use the outer method directly to avoid fetching a stack map, which is
-  // more expensive.
-  ArtMethod* caller_method = QuickArgumentVisitor::GetOuterMethod(sp);
-  DCHECK_EQ(caller_method, QuickArgumentVisitor::GetCallingMethod(sp));
+  ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
 
   // Fetch the dex_method_idx of the target interface method from the caller.
   uint32_t dex_pc = QuickArgumentVisitor::GetCallingDexPc(sp);
@@ -2162,23 +2170,22 @@
   }
 
   ArtMethod* interface_method = caller_method->GetDexCacheResolvedMethod(
-      dex_method_idx, sizeof(void*));
+      dex_method_idx, kRuntimePointerSize);
   DCHECK(interface_method != nullptr) << dex_method_idx << " " << PrettyMethod(caller_method);
   ArtMethod* method = nullptr;
-  ImTable* imt = cls->GetImt(sizeof(void*));
+  ImTable* imt = cls->GetImt(kRuntimePointerSize);
 
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
     // If the dex cache already resolved the interface method, look whether we have
     // a match in the ImtConflictTable.
-    uint32_t imt_index = interface_method->GetDexMethodIndex();
-    ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
+    ArtMethod* conflict_method = imt->Get(interface_method->GetImtIndex(), kRuntimePointerSize);
     if (LIKELY(conflict_method->IsRuntimeMethod())) {
-      ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
+      ImtConflictTable* current_table = conflict_method->GetImtConflictTable(kRuntimePointerSize);
       DCHECK(current_table != nullptr);
-      method = current_table->Lookup(interface_method, sizeof(void*));
+      method = current_table->Lookup(interface_method, kRuntimePointerSize);
     } else {
       // It seems we aren't really a conflict method!
-      method = cls->FindVirtualMethodForInterface(interface_method, sizeof(void*));
+      method = cls->FindVirtualMethodForInterface(interface_method, kRuntimePointerSize);
     }
     if (method != nullptr) {
       return GetTwoWordSuccessValue(
@@ -2187,7 +2194,7 @@
     }
 
     // No match, use the IfTable.
-    method = cls->FindVirtualMethodForInterface(interface_method, sizeof(void*));
+    method = cls->FindVirtualMethodForInterface(interface_method, kRuntimePointerSize);
     if (UNLIKELY(method == nullptr)) {
       ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(
           interface_method, this_object, caller_method);
@@ -2216,14 +2223,15 @@
       CHECK(self->IsExceptionPending());
       return GetTwoWordFailureValue();  // Failure.
     }
-    interface_method = caller_method->GetDexCacheResolvedMethod(dex_method_idx, sizeof(void*));
+    interface_method =
+        caller_method->GetDexCacheResolvedMethod(dex_method_idx, kRuntimePointerSize);
     DCHECK(!interface_method->IsRuntimeMethod());
   }
 
   // We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
   // We create a new table with the new pair { interface_method, method }.
-  uint32_t imt_index = interface_method->GetDexMethodIndex();
-  ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
+  uint32_t imt_index = interface_method->GetImtIndex();
+  ArtMethod* conflict_method = imt->Get(imt_index, kRuntimePointerSize);
   if (conflict_method->IsRuntimeMethod()) {
     ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
         cls.Get(),
@@ -2234,9 +2242,9 @@
     if (new_conflict_method != conflict_method) {
       // Update the IMT if we create a new conflict method. No fence needed here, as the
       // data is consistent.
-      imt->Set(imt_index % ImTable::kSize,
+      imt->Set(imt_index,
                new_conflict_method,
-               sizeof(void*));
+               kRuntimePointerSize);
     }
   }
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 01e22a4..553c092 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -80,10 +80,16 @@
 // This test ensures that kQuickCalleeSaveFrame_RefAndArgs_FrameSize is correct.
 TEST_F(QuickTrampolineEntrypointsTest, FrameSize) {
   // We have to use a define here as the callee_save_frame.h functions are constexpr.
-#define CHECK_FRAME_SIZE(isa)                                                                     \
-  CheckFrameSize(isa, Runtime::kRefsAndArgs, GetCalleeSaveFrameSize(isa, Runtime::kRefsAndArgs)); \
-  CheckFrameSize(isa, Runtime::kRefsOnly, GetCalleeSaveFrameSize(isa, Runtime::kRefsOnly));       \
-  CheckFrameSize(isa, Runtime::kSaveAll, GetCalleeSaveFrameSize(isa, Runtime::kSaveAll))
+#define CHECK_FRAME_SIZE(isa)                                                 \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveRefsAndArgs,                                   \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveRefsAndArgs));     \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveRefsOnly,                                      \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveRefsOnly));        \
+  CheckFrameSize(isa,                                                         \
+                 Runtime::kSaveAllCalleeSaves,                                \
+                 GetCalleeSaveFrameSize(isa, Runtime::kSaveAllCalleeSaves))
 
   CHECK_FRAME_SIZE(kArm);
   CHECK_FRAME_SIZE(kArm64);
@@ -108,12 +114,12 @@
   // Ensure that the computation in callee_save_frame.h correct.
   // Note: we can only check against the kRuntimeISA, because the ArtMethod computation uses
   // sizeof(void*), which is wrong when the target bitwidth is not the same as the host's.
-  CheckPCOffset(kRuntimeISA, Runtime::kRefsAndArgs,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsAndArgs));
-  CheckPCOffset(kRuntimeISA, Runtime::kRefsOnly,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly));
-  CheckPCOffset(kRuntimeISA, Runtime::kSaveAll,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveAll));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveRefsAndArgs,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsAndArgs));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveRefsOnly,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsOnly));
+  CheckPCOffset(kRuntimeISA, Runtime::kSaveAllCalleeSaves,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveAllCalleeSaves));
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c621672..004cdc4 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -112,15 +112,17 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_functions,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_function,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_functions, jni_entrypoints,
-                        sizeof(void*) * 6);
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspend_barriers,
+                        sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend_barriers, jni_entrypoints,
+                        sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
 
     // Skip across the entrypoints structures.
 
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, thread_local_start, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, thread_local_pos, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, mterp_current_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
@@ -209,11 +211,14 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObjectWithBoundCheck, pAputObject, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObject, pHandleFillArrayData, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pHandleFillArrayData, pJniMethodStart, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodStartSynchronized,
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodFastStart,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastStart, pJniMethodStartSynchronized,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStartSynchronized, pJniMethodEnd,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndSynchronized, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodFastEnd, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEnd, pJniMethodEndSynchronized, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference,
@@ -285,7 +290,8 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowDivZero, pThrowNoSuchMethod, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowNoSuchMethod, pThrowNullPointer, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowNullPointer, pThrowStackOverflow, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStackOverflow, pDeoptimize, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStackOverflow, pThrowStringBounds, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStringBounds, pDeoptimize, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pDeoptimize, pA64Load, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pA64Load, pA64Store, sizeof(void*));
 
@@ -321,8 +327,66 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMark, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMark, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMarkReg00, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg00, pReadBarrierMarkReg01,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg01, pReadBarrierMarkReg02,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg02, pReadBarrierMarkReg03,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg03, pReadBarrierMarkReg04,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg04, pReadBarrierMarkReg05,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg05, pReadBarrierMarkReg06,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg06, pReadBarrierMarkReg07,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg07, pReadBarrierMarkReg08,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg08, pReadBarrierMarkReg09,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg09, pReadBarrierMarkReg10,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg10, pReadBarrierMarkReg11,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg11, pReadBarrierMarkReg12,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg12, pReadBarrierMarkReg13,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg13, pReadBarrierMarkReg14,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg14, pReadBarrierMarkReg15,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg15, pReadBarrierMarkReg16,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg16, pReadBarrierMarkReg17,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg17, pReadBarrierMarkReg18,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg18, pReadBarrierMarkReg19,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg19, pReadBarrierMarkReg20,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg20, pReadBarrierMarkReg21,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg21, pReadBarrierMarkReg22,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg22, pReadBarrierMarkReg23,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg23, pReadBarrierMarkReg24,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg24, pReadBarrierMarkReg25,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg25, pReadBarrierMarkReg26,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg26, pReadBarrierMarkReg27,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg27, pReadBarrierMarkReg28,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg28, pReadBarrierMarkReg29,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierSlow, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
                          sizeof(void*));
 
diff --git a/runtime/experimental_flags.h b/runtime/experimental_flags.h
index 198f3fa..7faa2dc 100644
--- a/runtime/experimental_flags.h
+++ b/runtime/experimental_flags.h
@@ -26,7 +26,8 @@
   // The actual flag values.
   enum {
     kNone           = 0x0000,
-    kLambdas        = 0x0001,
+    kAgents         = 0x0001,  // 0b00000001
+    kRuntimePlugins = 0x0002,  // 0b00000010
   };
 
   constexpr ExperimentalFlags() : value_(0x0000) {}
@@ -64,8 +65,12 @@
 
 inline std::ostream& operator<<(std::ostream& stream, const ExperimentalFlags& e) {
   bool started = false;
-  if (e & ExperimentalFlags::kLambdas) {
-    stream << (started ? "|" : "") << "kLambdas";
+  if (e & ExperimentalFlags::kAgents) {
+    stream << (started ? "|" : "") << "kAgents";
+    started = true;
+  }
+  if (e & ExperimentalFlags::kRuntimePlugins) {
+    stream << (started ? "|" : "") << "kRuntimePlugins";
     started = true;
   }
   if (!started) {
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 9f073a6..f86921c 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -341,7 +341,7 @@
   // If we don't have a potential method, we're outta here.
   VLOG(signals) << "potential method: " << method_obj;
   // TODO: Check linear alloc and image.
-  DCHECK_ALIGNED(ArtMethod::Size(sizeof(void*)), sizeof(void*))
+  DCHECK_ALIGNED(ArtMethod::Size(kRuntimePointerSize), sizeof(void*))
       << "ArtMethod is not pointer aligned";
   if (method_obj == nullptr || !IsAligned<sizeof(void*)>(method_obj)) {
     VLOG(signals) << "no method";
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 625b1e8..56e0fb7 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -96,6 +96,14 @@
 
   bool Action(int sig, siginfo_t* siginfo, void* context) OVERRIDE;
 
+  static bool IsValidImplicitCheck(siginfo_t* siginfo) {
+    // Our implicit NPE checks always limit the range to a page.
+    // Note that the runtime will do more exhaustive checks (that we cannot
+    // reasonably do in signal processing code) based on the dex instruction
+    // faulting.
+    return CanDoImplicitNullCheckOn(reinterpret_cast<uintptr_t>(siginfo->si_addr));
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(NullPointerHandler);
 };
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 4e40aea..35bcb18 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -175,6 +175,11 @@
   card_table->ModifyCardsAtomic(space_->Begin(), space_->End(), AgeCardVisitor(), visitor);
 }
 
+void ModUnionTableReferenceCache::ClearTable() {
+  cleared_cards_.clear();
+  references_.clear();
+}
+
 class AddToReferenceArrayVisitor {
  public:
   AddToReferenceArrayVisitor(ModUnionTableReferenceCache* mod_union_table,
@@ -318,6 +323,18 @@
   const std::set<mirror::Object*>& references_;
 };
 
+class EmptyMarkObjectVisitor : public MarkObjectVisitor {
+ public:
+  mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE {return obj;}
+  void MarkHeapReference(mirror::HeapReference<mirror::Object>*) OVERRIDE {}
+};
+
+void ModUnionTable::FilterCards() {
+  EmptyMarkObjectVisitor visitor;
+  // Use empty visitor since filtering is automatically done by UpdateAndMarkReferences.
+  UpdateAndMarkReferences(&visitor);
+}
+
 void ModUnionTableReferenceCache::Verify() {
   // Start by checking that everything in the mod union table is marked.
   for (const auto& ref_pair : references_) {
@@ -364,6 +381,31 @@
   }
 }
 
+void ModUnionTableReferenceCache::VisitObjects(ObjectCallback* callback, void* arg) {
+  CardTable* const card_table = heap_->GetCardTable();
+  ContinuousSpaceBitmap* live_bitmap = space_->GetLiveBitmap();
+  for (uint8_t* card : cleared_cards_) {
+    uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
+    uintptr_t end = start + CardTable::kCardSize;
+    live_bitmap->VisitMarkedRange(start,
+                                  end,
+                                  [this, callback, arg](mirror::Object* obj) {
+      callback(obj, arg);
+    });
+  }
+  // This may visit the same card twice, TODO avoid this.
+  for (const auto& pair : references_) {
+    const uint8_t* card = pair.first;
+    uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
+    uintptr_t end = start + CardTable::kCardSize;
+    live_bitmap->VisitMarkedRange(start,
+                                  end,
+                                  [this, callback, arg](mirror::Object* obj) {
+      callback(obj, arg);
+    });
+  }
+}
+
 void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
   CardTable* const card_table = heap_->GetCardTable();
   std::vector<mirror::HeapReference<mirror::Object>*> cards_references;
@@ -489,6 +531,10 @@
   card_table->ModifyCardsAtomic(space_->Begin(), space_->End(), AgeCardVisitor(), visitor);
 }
 
+void ModUnionTableCardCache::ClearTable() {
+  card_bitmap_->Bitmap::Clear();
+}
+
 // Mark all references to the alloc space(s).
 void ModUnionTableCardCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
   // TODO: Needs better support for multi-images? b/26317072
@@ -502,6 +548,22 @@
       0, RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize, bit_visitor);
 }
 
+void ModUnionTableCardCache::VisitObjects(ObjectCallback* callback, void* arg) {
+  card_bitmap_->VisitSetBits(
+      0,
+      RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize,
+      [this, callback, arg](size_t bit_index) {
+        const uintptr_t start = card_bitmap_->AddrFromBitIndex(bit_index);
+        DCHECK(space_->HasAddress(reinterpret_cast<mirror::Object*>(start)))
+            << start << " " << *space_;
+        space_->GetLiveBitmap()->VisitMarkedRange(start,
+                                                  start + CardTable::kCardSize,
+                                                  [this, callback, arg](mirror::Object* obj) {
+          callback(obj, arg);
+        });
+      });
+}
+
 void ModUnionTableCardCache::Dump(std::ostream& os) {
   os << "ModUnionTable dirty cards: [";
   // TODO: Find cleaner way of doing this.
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index a7a4246..6aa2417 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -63,11 +63,17 @@
   // Set all the cards.
   virtual void SetCards() = 0;
 
+  // Clear all of the table.
+  virtual void ClearTable() = 0;
+
   // Update the mod-union table using data stored by ClearCards. There may be multiple ClearCards
   // before a call to update, for example, back-to-back sticky GCs. Also mark references to other
   // spaces which are stored in the mod-union table.
   virtual void UpdateAndMarkReferences(MarkObjectVisitor* visitor) = 0;
 
+  // Visit all of the objects that may contain references to other spaces.
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) = 0;
+
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
   // for said cards. Exclusive lock is required since verify sometimes uses
   // SpaceBitmap::VisitMarkedRange and VisitMarkedRange can't know if the callback will modify the
@@ -78,6 +84,9 @@
   // doesn't need to be aligned.
   virtual bool ContainsCardFor(uintptr_t addr) = 0;
 
+  // Filter out cards that don't need to be marked. Automatically done with UpdateAndMarkReferences.
+  void FilterCards();
+
   virtual void Dump(std::ostream& os) = 0;
 
   space::ContinuousSpace* GetSpace() {
@@ -115,6 +124,10 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(Locks::heap_bitmap_lock_);
 
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Exclusive lock is required since verify uses SpaceBitmap::VisitMarkedRange and
   // VisitMarkedRange can't know if the callback will modify the bitmap or not.
   void Verify() OVERRIDE
@@ -130,6 +143,8 @@
 
   virtual void SetCards() OVERRIDE;
 
+  virtual void ClearTable() OVERRIDE;
+
  protected:
   // Cleared card array, used to update the mod-union table.
   ModUnionTable::CardSet cleared_cards_;
@@ -156,6 +171,10 @@
       REQUIRES(Locks::heap_bitmap_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  virtual void VisitObjects(ObjectCallback* callback, void* arg) OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Nothing to verify.
   virtual void Verify() OVERRIDE {}
 
@@ -163,9 +182,10 @@
 
   virtual bool ContainsCardFor(uintptr_t addr) OVERRIDE;
 
-  // Sets all the cards in the mod union table to be marked.
   virtual void SetCards() OVERRIDE;
 
+  virtual void ClearTable() OVERRIDE;
+
  protected:
   // Cleared card bitmap, used to update the mod-union table.
   std::unique_ptr<CardBitmap> card_bitmap_;
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index bd023b3..522f236 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -17,10 +17,11 @@
 #include "allocation_record.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
 #include "stack.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/properties.h"
 #endif
 
@@ -38,7 +39,7 @@
 }
 
 void AllocRecordObjectMap::SetProperties() {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // Check whether there's a system property overriding the max number of records.
   const char* propertyName = "dalvik.vm.allocTrackerMax";
   char allocMaxString[PROPERTY_VALUE_MAX];
@@ -88,7 +89,7 @@
       max_stack_depth_ = value;
     }
   }
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 AllocRecordObjectMap::~AllocRecordObjectMap() {
@@ -112,7 +113,7 @@
     for (size_t i = 0, depth = record.GetDepth(); i < depth; ++i) {
       const AllocRecordStackTraceElement& element = record.StackElement(i);
       DCHECK(element.GetMethod() != nullptr);
-      element.GetMethod()->VisitRoots(buffered_visitor, sizeof(void*));
+      element.GetMethod()->VisitRoots(buffered_visitor, kRuntimePointerSize);
     }
   }
 }
@@ -187,7 +188,7 @@
  public:
   AllocRecordStackVisitor(Thread* thread, size_t max_depth, AllocRecordStackTrace* trace_out)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve),
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         max_depth_(max_depth),
         trace_(trace_out) {}
 
@@ -200,7 +201,7 @@
     ArtMethod* m = GetMethod();
     // m may be null if we have inlined methods of unresolved classes. b/27858645
     if (m != nullptr && !m->IsRuntimeMethod()) {
-      m = m->GetInterfaceMethodIfProxy(sizeof(void*));
+      m = m->GetInterfaceMethodIfProxy(kRuntimePointerSize);
       trace_->AddStackElement(AllocRecordStackTraceElement(m, GetDexPc()));
     }
     return true;
diff --git a/runtime/gc/allocator/dlmalloc.h b/runtime/gc/allocator/dlmalloc.h
index 50e2622..c07da5d 100644
--- a/runtime/gc/allocator/dlmalloc.h
+++ b/runtime/gc/allocator/dlmalloc.h
@@ -35,7 +35,7 @@
 #include "../../external/dlmalloc/malloc.h"
 #pragma GCC diagnostic pop
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 // Define dlmalloc routines from bionic that cannot be included directly because of redefining
 // symbols from the include above.
 extern "C" void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), void* arg);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index bd84d0d..375d869 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1021,7 +1021,7 @@
 
   // First mark slots to free in the bulk free bit map without locking the
   // size bracket locks. On host, unordered_set is faster than vector + flag.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   std::vector<Run*> runs;
 #else
   std::unordered_set<Run*, hash_run, eq_run> runs;
@@ -1088,7 +1088,7 @@
     DCHECK_EQ(run->magic_num_, kMagicNum);
     // Set the bit in the bulk free bit map.
     freed_bytes += run->AddToBulkFreeList(ptr);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     if (!run->to_be_bulk_freed_) {
       run->to_be_bulk_freed_ = true;
       runs.push_back(run);
@@ -1103,7 +1103,7 @@
   // union the bulk free bit map into the thread-local free bit map
   // (for thread-local runs.)
   for (Run* run : runs) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     DCHECK(run->to_be_bulk_freed_);
     run->to_be_bulk_freed_ = false;
 #endif
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 26f5ad3..fb774a4 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -28,6 +28,78 @@
 namespace gc {
 namespace collector {
 
+inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegion(
+    mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) {
+  // For the Baker-style RB, in a rare case, we could incorrectly change the object from white
+  // to gray even though the object has already been marked through. This happens if a mutator
+  // thread gets preempted before the AtomicSetReadBarrierPointer below, GC marks through the
+  // object (changes it from white to gray and back to white), and the thread runs and
+  // incorrectly changes it from white to gray. We need to detect such "false gray" cases and
+  // change the objects back to white at the end of marking.
+  if (kUseBakerReadBarrier) {
+    // Test the bitmap first to reduce the chance of false gray cases.
+    if (bitmap->Test(ref)) {
+      return ref;
+    }
+  }
+  // This may or may not succeed, which is ok because the object may already be gray.
+  bool cas_success = false;
+  if (kUseBakerReadBarrier) {
+    cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                   ReadBarrier::GrayPtr());
+  }
+  if (bitmap->AtomicTestAndSet(ref)) {
+    // Already marked.
+    if (kUseBakerReadBarrier &&
+        cas_success &&
+        // The object could be white here if a thread gets preempted after a success at the
+        // above AtomicSetReadBarrierPointer, GC has marked through it, and the thread runs up
+        // to this point.
+        ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      // Register a "false-gray" object to change it from gray to white at the end of marking.
+      PushOntoFalseGrayStack(ref);
+    }
+  } else {
+    // Newly marked.
+    if (kUseBakerReadBarrier) {
+      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+    }
+    PushOntoMarkStack(ref);
+  }
+  return ref;
+}
+
+template<bool kGrayImmuneObject>
+inline mirror::Object* ConcurrentCopying::MarkImmuneSpace(mirror::Object* ref) {
+  if (kUseBakerReadBarrier) {
+    // The GC-running thread doesn't (need to) gray immune objects except when updating thread roots
+    // in the thread flip on behalf of suspended threads (when gc_grays_immune_objects_ is
+    // true). Also, a mutator doesn't (need to) gray an immune object after GC has updated all
+    // immune space objects (when updated_all_immune_objects_ is true).
+    if (kIsDebugBuild) {
+      if (Thread::Current() == thread_running_gc_) {
+        DCHECK(!kGrayImmuneObject ||
+               updated_all_immune_objects_.LoadRelaxed() ||
+               gc_grays_immune_objects_);
+      } else {
+        DCHECK(kGrayImmuneObject);
+      }
+    }
+    if (!kGrayImmuneObject || updated_all_immune_objects_.LoadRelaxed()) {
+      return ref;
+    }
+    // This may or may not succeed, which is ok because the object may already be gray.
+    bool success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                    ReadBarrier::GrayPtr());
+    if (success) {
+      MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+      immune_gray_stack_.push_back(ref);
+    }
+  }
+  return ref;
+}
+
+template<bool kGrayImmuneObject>
 inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
   if (from_ref == nullptr) {
     return nullptr;
@@ -68,29 +140,46 @@
       return to_ref;
     }
     case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace: {
-      // This may or may not succeed, which is ok.
-      if (kUseBakerReadBarrier) {
-        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-      }
-      mirror::Object* to_ref = from_ref;
-      if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
-        // Already marked.
-      } else {
-        // Newly marked.
-        if (kUseBakerReadBarrier) {
-          DCHECK_EQ(to_ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
-        }
-        PushOntoMarkStack(to_ref);
-      }
-      return to_ref;
+      return MarkUnevacFromSpaceRegion(from_ref, region_space_bitmap_);
     }
     case space::RegionSpace::RegionType::kRegionTypeNone:
-      return MarkNonMoving(from_ref);
+      if (immune_spaces_.ContainsObject(from_ref)) {
+        return MarkImmuneSpace<kGrayImmuneObject>(from_ref);
+      } else {
+        return MarkNonMoving(from_ref);
+      }
     default:
       UNREACHABLE();
   }
 }
 
+inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
+  mirror::Object* ret;
+  // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
+  // is array allocations.
+  if (from_ref == nullptr || from_ref->GetMarkBit()) {
+    return from_ref;
+  }
+  // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
+  if (UNLIKELY(mark_from_read_barrier_measurements_)) {
+    ret = MarkFromReadBarrierWithMeasurements(from_ref);
+  } else {
+    ret = Mark(from_ref);
+  }
+  // Only set the mark bit for baker barrier.
+  if (kUseBakerReadBarrier && LIKELY(!rb_mark_bit_stack_full_ && ret->AtomicSetMarkBit(0, 1))) {
+    // If the mark stack is full, we may temporarily go to mark and back to unmarked. Seeing both
+    // values are OK since the only race is doing an unnecessary Mark.
+    if (!rb_mark_bit_stack_->AtomicPushBack(ret)) {
+      // Mark stack is full, set the bit back to zero.
+      CHECK(ret->AtomicSetMarkBit(1, 0));
+      // Set rb_mark_bit_stack_full_, this is racy but OK since AtomicPushBack is thread safe.
+      rb_mark_bit_stack_full_ = true;
+    }
+  }
+  return ret;
+}
+
 inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
   DCHECK(region_space_->IsInFromSpace(from_ref));
   LockWord lw = from_ref->GetLockWord(false);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 36efca1..42816a0 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -17,9 +17,13 @@
 #include "concurrent_copying.h"
 
 #include "art_field-inl.h"
+#include "base/enums.h"
+#include "base/histogram-inl.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "debugger.h"
 #include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/mod_union_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/reference_processor.h"
 #include "gc/space/image_space.h"
@@ -38,8 +42,20 @@
 namespace collector {
 
 static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
+// If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod
+// union table. Disabled since it does not seem to help the pause much.
+static constexpr bool kFilterModUnionCards = kIsDebugBuild;
+// If kDisallowReadBarrierDuringScan is true then the GC aborts if there are any that occur during
+// ConcurrentCopying::Scan. May be used to diagnose possibly unnecessary read barriers.
+// Only enabled for kIsDebugBuild to avoid performance hit.
+static constexpr bool kDisallowReadBarrierDuringScan = kIsDebugBuild;
+// Slow path mark stack size, increase this if the stack is getting full and it is causing
+// performance problems.
+static constexpr size_t kReadBarrierMarkStackSize = 512 * KB;
 
-ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix)
+ConcurrentCopying::ConcurrentCopying(Heap* heap,
+                                     const std::string& name_prefix,
+                                     bool measure_read_barrier_slow_path)
     : GarbageCollector(heap,
                        name_prefix + (name_prefix.empty() ? "" : " ") +
                        "concurrent copying + mark sweep"),
@@ -47,17 +63,31 @@
       gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack",
                                                      kDefaultGcMarkStackSize,
                                                      kDefaultGcMarkStackSize)),
+      rb_mark_bit_stack_(accounting::ObjectStack::Create("rb copying gc mark stack",
+                                                         kReadBarrierMarkStackSize,
+                                                         kReadBarrierMarkStackSize)),
+      rb_mark_bit_stack_full_(false),
       mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
       thread_running_gc_(nullptr),
       is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
+      region_space_bitmap_(nullptr),
       heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
+      measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
+      rb_slow_path_ns_(0),
+      rb_slow_path_count_(0),
+      rb_slow_path_count_gc_(0),
+      rb_slow_path_histogram_lock_("Read barrier histogram lock"),
+      rb_slow_path_time_histogram_("Mutator time in read barrier slow path", 500, 32),
+      rb_slow_path_count_total_(0),
+      rb_slow_path_count_gc_total_(0),
       rb_table_(heap_->GetReadBarrierTable()),
-      force_evacuate_all_(false) {
+      force_evacuate_all_(false),
+      immune_gray_stack_lock_("concurrent copying immune gray stack lock",
+                              kMarkSweepMarkStackLock) {
   static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize,
                 "The region space size and the read barrier table region size must match");
-  cc_heap_bitmap_.reset(new accounting::HeapBitmap(heap));
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -139,19 +169,10 @@
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
       immune_spaces_.AddSpace(space);
-      const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" :
-          "cc zygote space bitmap";
-      // TODO: try avoiding using bitmaps for image/zygote to save space.
-      accounting::ContinuousSpaceBitmap* bitmap =
-          accounting::ContinuousSpaceBitmap::Create(bitmap_name, space->Begin(), space->Capacity());
-      cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap);
-      cc_bitmaps_.push_back(bitmap);
     } else if (space == region_space_) {
       accounting::ContinuousSpaceBitmap* bitmap =
           accounting::ContinuousSpaceBitmap::Create("cc region space bitmap",
                                                     space->Begin(), space->Capacity());
-      cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap);
-      cc_bitmaps_.push_back(bitmap);
       region_space_bitmap_ = bitmap;
     }
   }
@@ -165,6 +186,19 @@
               << reinterpret_cast<void*>(region_space_->Limit());
   }
   CheckEmptyMarkStack();
+  if (kIsDebugBuild) {
+    MutexLock mu(Thread::Current(), mark_stack_lock_);
+    CHECK(false_gray_stack_.empty());
+  }
+
+  rb_mark_bit_stack_full_ = false;
+  mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
+  if (measure_read_barrier_slow_path_) {
+    rb_slow_path_ns_.StoreRelaxed(0);
+    rb_slow_path_count_.StoreRelaxed(0);
+    rb_slow_path_count_gc_.StoreRelaxed(0);
+  }
+
   immune_spaces_.Reset();
   bytes_moved_.StoreRelaxed(0);
   objects_moved_.StoreRelaxed(0);
@@ -175,6 +209,15 @@
   } else {
     force_evacuate_all_ = false;
   }
+  if (kUseBakerReadBarrier) {
+    updated_all_immune_objects_.StoreRelaxed(false);
+    // GC may gray immune objects in the thread flip.
+    gc_grays_immune_objects_ = true;
+    if (kIsDebugBuild) {
+      MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+      DCHECK(immune_gray_stack_.empty());
+    }
+  }
   BindBitmaps();
   if (kVerboseMode) {
     LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_;
@@ -185,10 +228,12 @@
     }
     LOG(INFO) << "GC end of InitializePhase";
   }
+  // Mark all of the zygote large objects without graying them.
+  MarkZygoteLargeObjects();
 }
 
 // Used to switch the thread roots of a thread from from-space refs to to-space refs.
-class ConcurrentCopying::ThreadFlipVisitor : public Closure {
+class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor {
  public:
   ThreadFlipVisitor(ConcurrentCopying* concurrent_copying, bool use_tlab)
       : concurrent_copying_(concurrent_copying), use_tlab_(use_tlab) {
@@ -215,10 +260,44 @@
       thread->RevokeThreadLocalAllocationStack();
     }
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    thread->VisitRoots(concurrent_copying_);
+    // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots
+    // only.
+    thread->VisitRoots(this);
     concurrent_copying_->GetBarrier().Pass(self);
   }
 
+  void VisitRoots(mirror::Object*** roots,
+                  size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object** root = roots[i];
+      mirror::Object* ref = *root;
+      if (ref != nullptr) {
+        mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+        if (to_ref != ref) {
+          *root = to_ref;
+        }
+      }
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
+                  size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::CompressedReference<mirror::Object>* const root = roots[i];
+      if (!root->IsNull()) {
+        mirror::Object* ref = root->AsMirrorPtr();
+        mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+        if (to_ref != ref) {
+          root->Assign(to_ref);
+        }
+      }
+    }
+  }
+
  private:
   ConcurrentCopying* const concurrent_copying_;
   const bool use_tlab_;
@@ -247,17 +326,103 @@
     }
     cc->is_marking_ = true;
     cc->mark_stack_mode_.StoreRelaxed(ConcurrentCopying::kMarkStackModeThreadLocal);
+    if (kIsDebugBuild) {
+      cc->region_space_->AssertAllRegionLiveBytesZeroOrCleared();
+    }
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
       TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
       Runtime::Current()->VisitTransactionRoots(cc);
     }
+    if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+      cc->GrayAllDirtyImmuneObjects();
+      if (kIsDebugBuild) {
+        // Check that all non-gray immune objects only refernce immune objects.
+        cc->VerifyGrayImmuneObjects();
+      }
+    }
   }
 
  private:
   ConcurrentCopying* const concurrent_copying_;
 };
 
+class ConcurrentCopying::VerifyGrayImmuneObjectsVisitor {
+ public:
+  explicit VerifyGrayImmuneObjectsVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */)
+      const ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_)
+      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
+    CheckReference(obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(offset),
+                   obj, offset);
+  }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE {
+    CHECK(klass->IsTypeOfReferenceClass());
+    CheckReference(ref->GetReferent<kWithoutReadBarrier>(),
+                   ref,
+                   mirror::Reference::ReferentOffset());
+  }
+
+  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    CheckReference(root->AsMirrorPtr(), nullptr, MemberOffset(0));
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+
+  void CheckReference(mirror::Object* ref, mirror::Object* holder, MemberOffset offset) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (ref != nullptr) {
+      if (!collector_->immune_spaces_.ContainsObject(ref)) {
+        // Not immune, must be a zygote large object.
+        CHECK(Runtime::Current()->GetHeap()->GetLargeObjectsSpace()->IsZygoteLargeObject(
+            Thread::Current(), ref))
+            << "Non gray object references non immune, non zygote large object "<< ref << " "
+            << PrettyTypeOf(ref) << " in holder " << holder << " " << PrettyTypeOf(holder)
+            << " offset=" << offset.Uint32Value();
+      } else {
+        // Make sure the large object class is immune since we will never scan the large object.
+        CHECK(collector_->immune_spaces_.ContainsObject(
+            ref->GetClass<kVerifyNone, kWithoutReadBarrier>()));
+      }
+    }
+  }
+};
+
+void ConcurrentCopying::VerifyGrayImmuneObjects() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  for (auto& space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    VerifyGrayImmuneObjectsVisitor visitor(this);
+    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                  reinterpret_cast<uintptr_t>(space->Limit()),
+                                  [&visitor](mirror::Object* obj)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+      // If an object is not gray, it should only have references to things in the immune spaces.
+      if (obj->GetReadBarrierPointer() != ReadBarrier::GrayPtr()) {
+        obj->VisitReferences</*kVisitNativeRoots*/true,
+                             kDefaultVerifyFlags,
+                             kWithoutReadBarrier>(visitor, visitor);
+      }
+    });
+  }
+}
+
 // Switch threads that from from-space to to-space refs. Forward/mark the thread roots.
 void ConcurrentCopying::FlipThreadRoots() {
   TimingLogger::ScopedTiming split("FlipThreadRoots", GetTimings());
@@ -270,10 +435,8 @@
   gc_barrier_->Init(self, 0);
   ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_);
   FlipCallback flip_callback(this);
-  heap_->ThreadFlipBegin(self);  // Sync with JNI critical calls.
   size_t barrier_count = Runtime::Current()->FlipThreadRoots(
       &thread_flip_visitor, &flip_callback, this);
-  heap_->ThreadFlipEnd(self);
   {
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
     gc_barrier_->Increment(self, barrier_count);
@@ -287,6 +450,52 @@
   }
 }
 
+class ConcurrentCopying::GrayImmuneObjectVisitor {
+ public:
+  explicit GrayImmuneObjectVisitor() {}
+
+  ALWAYS_INLINE void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (kUseBakerReadBarrier) {
+      if (kIsDebugBuild) {
+        Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+      }
+      obj->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+    }
+  }
+
+  static void Callback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
+    reinterpret_cast<GrayImmuneObjectVisitor*>(arg)->operator()(obj);
+  }
+};
+
+void ConcurrentCopying::GrayAllDirtyImmuneObjects() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  accounting::CardTable* const card_table = heap->GetCardTable();
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    GrayImmuneObjectVisitor visitor;
+    accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
+    // Mark all the objects on dirty cards since these may point to objects in other space.
+    // Once these are marked, the GC will eventually clear them later.
+    // Table is non null for boot image and zygote spaces. It is only null for application image
+    // spaces.
+    if (table != nullptr) {
+      // TODO: Add preclean outside the pause.
+      table->ClearCards();
+      table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor);
+    } else {
+      // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the
+      // pause because app image spaces are all dirty pages anyways.
+      card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor);
+    }
+  }
+  // Since all of the objects that may point to other spaces are marked, we can avoid all the read
+  // barriers in the immune spaces.
+  updated_all_immune_objects_.StoreRelaxed(true);
+}
+
 void ConcurrentCopying::SwapStacks() {
   heap_->SwapStacks();
 }
@@ -296,40 +505,6 @@
   live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
 }
 
-// Used to visit objects in the immune spaces.
-class ConcurrentCopying::ImmuneSpaceObjVisitor {
- public:
-  explicit ImmuneSpaceObjVisitor(ConcurrentCopying* cc) : collector_(cc) {}
-
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(obj != nullptr);
-    DCHECK(collector_->immune_spaces_.ContainsObject(obj));
-    accounting::ContinuousSpaceBitmap* cc_bitmap =
-        collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
-    DCHECK(cc_bitmap != nullptr)
-        << "An immune space object must have a bitmap";
-    if (kIsDebugBuild) {
-      DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj))
-          << "Immune space object must be already marked";
-    }
-    // This may or may not succeed, which is ok.
-    if (kUseBakerReadBarrier) {
-      obj->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-    }
-    if (cc_bitmap->AtomicTestAndSet(obj)) {
-      // Already marked. Do nothing.
-    } else {
-      // Newly marked. Set the gray bit and push it onto the mark stack.
-      CHECK(!kUseBakerReadBarrier || obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-      collector_->PushOntoMarkStack(obj);
-    }
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
 class EmptyCheckpoint : public Closure {
  public:
   explicit EmptyCheckpoint(ConcurrentCopying* concurrent_copying)
@@ -350,6 +525,41 @@
   ConcurrentCopying* const concurrent_copying_;
 };
 
+// Used to visit objects in the immune spaces.
+inline void ConcurrentCopying::ScanImmuneObject(mirror::Object* obj) {
+  DCHECK(obj != nullptr);
+  DCHECK(immune_spaces_.ContainsObject(obj));
+  // Update the fields without graying it or pushing it onto the mark stack.
+  Scan(obj);
+}
+
+class ConcurrentCopying::ImmuneSpaceScanObjVisitor {
+ public:
+  explicit ImmuneSpaceScanObjVisitor(ConcurrentCopying* cc)
+      : collector_(cc) {}
+
+  ALWAYS_INLINE void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
+      if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+        collector_->ScanImmuneObject(obj);
+        // Done scanning the object, go back to white.
+        bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                        ReadBarrier::WhitePtr());
+        CHECK(success);
+      }
+    } else {
+      collector_->ScanImmuneObject(obj);
+    }
+  }
+
+  static void Callback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
+    reinterpret_cast<ImmuneSpaceScanObjVisitor*>(arg)->operator()(obj);
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+};
+
 // Concurrently mark roots that are guarded by read barriers and process the mark stack.
 void ConcurrentCopying::MarkingPhase() {
   TimingLogger::ScopedTiming split("MarkingPhase", GetTimings());
@@ -357,25 +567,54 @@
     LOG(INFO) << "GC MarkingPhase";
   }
   CHECK(weak_ref_access_enabled_);
+
+  // Scan immune spaces.
+  // Update all the fields in the immune spaces first without graying the objects so that we
+  // minimize dirty pages in the immune spaces. Note mutators can concurrently access and gray some
+  // of the objects.
+  if (kUseBakerReadBarrier) {
+    gc_grays_immune_objects_ = false;
+  }
   {
-    // Mark the image root. The WB-based collectors do not need to
-    // scan the image objects from roots by relying on the card table,
-    // but it's necessary for the RB to-space invariant to hold.
-    TimingLogger::ScopedTiming split1("VisitImageRoots", GetTimings());
-    for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
-      if (space->IsImageSpace()) {
-        gc::space::ImageSpace* image = space->AsImageSpace();
-        if (image != nullptr) {
-          mirror::ObjectArray<mirror::Object>* image_root = image->GetImageHeader().GetImageRoots();
-          mirror::Object* marked_image_root = Mark(image_root);
-          CHECK_EQ(image_root, marked_image_root) << "An image object does not move";
-          if (ReadBarrier::kEnableToSpaceInvariantChecks) {
-            AssertToSpaceInvariant(nullptr, MemberOffset(0), marked_image_root);
-          }
-        }
+    TimingLogger::ScopedTiming split2("ScanImmuneSpaces", GetTimings());
+    for (auto& space : immune_spaces_.GetSpaces()) {
+      DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+      ImmuneSpaceScanObjVisitor visitor(this);
+      if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects && table != nullptr) {
+        table->VisitObjects(ImmuneSpaceScanObjVisitor::Callback, &visitor);
+      } else {
+        live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                      reinterpret_cast<uintptr_t>(space->Limit()),
+                                      visitor);
       }
     }
   }
+  if (kUseBakerReadBarrier) {
+    // This release fence makes the field updates in the above loop visible before allowing mutator
+    // getting access to immune objects without graying it first.
+    updated_all_immune_objects_.StoreRelease(true);
+    // Now whiten immune objects concurrently accessed and grayed by mutators. We can't do this in
+    // the above loop because we would incorrectly disable the read barrier by whitening an object
+    // which may point to an unscanned, white object, breaking the to-space invariant.
+    //
+    // Make sure no mutators are in the middle of marking an immune object before whitening immune
+    // objects.
+    IssueEmptyCheckpoint();
+    MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+    if (kVerboseMode) {
+      LOG(INFO) << "immune gray stack size=" << immune_gray_stack_.size();
+    }
+    for (mirror::Object* obj : immune_gray_stack_) {
+      DCHECK(obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                      ReadBarrier::WhitePtr());
+      DCHECK(success);
+    }
+    immune_gray_stack_.clear();
+  }
+
   {
     TimingLogger::ScopedTiming split2("VisitConcurrentRoots", GetTimings());
     Runtime::Current()->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
@@ -386,16 +625,6 @@
     Runtime::Current()->VisitNonThreadRoots(this);
   }
 
-  // Immune spaces.
-  for (auto& space : immune_spaces_.GetSpaces()) {
-    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-    ImmuneSpaceObjVisitor visitor(this);
-    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                  reinterpret_cast<uintptr_t>(space->Limit()),
-                                  visitor);
-  }
-
   Thread* self = Thread::Current();
   {
     TimingLogger::ScopedTiming split7("ProcessMarkStack", GetTimings());
@@ -458,6 +687,9 @@
     Runtime::Current()->GetClassLinker()->CleanupClassLoaders();
     // Marking is done. Disable marking.
     DisableMarking();
+    if (kUseBakerReadBarrier) {
+      ProcessFalseGrayStack();
+    }
     CheckEmptyMarkStack();
   }
 
@@ -547,6 +779,32 @@
   mark_stack_mode_.StoreSequentiallyConsistent(kMarkStackModeOff);
 }
 
+void ConcurrentCopying::PushOntoFalseGrayStack(mirror::Object* ref) {
+  CHECK(kUseBakerReadBarrier);
+  DCHECK(ref != nullptr);
+  MutexLock mu(Thread::Current(), mark_stack_lock_);
+  false_gray_stack_.push_back(ref);
+}
+
+void ConcurrentCopying::ProcessFalseGrayStack() {
+  CHECK(kUseBakerReadBarrier);
+  // Change the objects on the false gray stack from gray to white.
+  MutexLock mu(Thread::Current(), mark_stack_lock_);
+  for (mirror::Object* obj : false_gray_stack_) {
+    DCHECK(IsMarked(obj));
+    // The object could be white here if a thread got preempted after a success at the
+    // AtomicSetReadBarrierPointer in Mark(), GC started marking through it (but not finished so
+    // still gray), and the thread ran to register it onto the false gray stack.
+    if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                      ReadBarrier::WhitePtr());
+      DCHECK(success);
+    }
+  }
+  false_gray_stack_.clear();
+}
+
+
 void ConcurrentCopying::IssueEmptyCheckpoint() {
   Thread* self = Thread::Current();
   EmptyCheckpoint check_point(this);
@@ -654,8 +912,8 @@
   return heap_->live_stack_.get();
 }
 
-// The following visitors are that used to verify that there's no
-// references to the from-space left after marking.
+// The following visitors are used to verify that there's no references to the from-space left after
+// marking.
 class ConcurrentCopying::VerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
  public:
   explicit VerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
@@ -669,20 +927,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      if (collector_->RegionSpace()->IsInToSpace(ref)) {
-        CHECK(ref->GetReadBarrierPointer() == nullptr)
-            << "To-space ref " << ref << " " << PrettyTypeOf(ref)
-            << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
-      } else {
-        CHECK(ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
-              (ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
-               collector_->IsOnAllocStack(ref)))
-            << "Non-moving/unevac from space ref " << ref << " " << PrettyTypeOf(ref)
-            << " has non-black rb_ptr " << ref->GetReadBarrierPointer()
-            << " but isn't on the alloc stack (and has white rb_ptr)."
-            << " Is it in the non-moving space="
-            << (collector_->GetHeap()->GetNonMovingSpace()->HasAddress(ref));
-      }
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
+          << "Ref " << ref << " " << PrettyTypeOf(ref)
+          << " has non-white rb_ptr ";
     }
   }
 
@@ -748,18 +995,8 @@
     VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      if (collector->RegionSpace()->IsInToSpace(obj)) {
-        CHECK(obj->GetReadBarrierPointer() == nullptr)
-            << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
-      } else {
-        CHECK(obj->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
-              (obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
-               collector->IsOnAllocStack(obj)))
-            << "Non-moving space/unevac from space ref " << obj << " " << PrettyTypeOf(obj)
-            << " has non-black rb_ptr " << obj->GetReadBarrierPointer()
-            << " but isn't on the alloc stack (and has white rb_ptr). Is it in the non-moving space="
-            << (collector->GetHeap()->GetNonMovingSpace()->HasAddress(obj));
-      }
+      CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
+          << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
     }
   }
 
@@ -1067,7 +1304,6 @@
   }
   // Scan ref fields.
   Scan(to_ref);
-  // Mark the gray ref as white or black.
   if (kUseBakerReadBarrier) {
     DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
         << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
@@ -1077,41 +1313,34 @@
   if (UNLIKELY((to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
                 to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
-    // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
-    // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
+    // Leave this reference gray in the queue so that GetReferent() will trigger a read barrier. We
+    // will change it to white later in ReferenceQueue::DequeuePendingReference().
     DCHECK(to_ref->AsReference()->GetPendingNext() != nullptr) << "Left unenqueued ref gray " << to_ref;
   } else {
-    // We may occasionally leave a Reference black or white in the queue if its referent happens to
-    // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
-    // the above IsInToSpace() evaluates to true and we change the color from gray to black or white
-    // here in this else block.
+    // We may occasionally leave a reference white in the queue if its referent happens to be
+    // concurrently marked after the Scan() call above has enqueued the Reference, in which case the
+    // above IsInToSpace() evaluates to true and we change the color from gray to white here in this
+    // else block.
     if (kUseBakerReadBarrier) {
-      if (region_space_->IsInToSpace(to_ref)) {
-        // If to-space, change from gray to white.
-        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-            ReadBarrier::GrayPtr(),
-            ReadBarrier::WhitePtr());
-        DCHECK(success) << "Must succeed as we won the race.";
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
-      } else {
-        // If non-moving space/unevac from space, change from gray
-        // to black. We can't change gray to white because it's not
-        // safe to use CAS if two threads change values in opposite
-        // directions (A->B and B->A). So, we change it to black to
-        // indicate non-moving objects that have been marked
-        // through. Note we'd need to change from black to white
-        // later (concurrently).
-        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-            ReadBarrier::GrayPtr(),
-            ReadBarrier::BlackPtr());
-        DCHECK(success) << "Must succeed as we won the race.";
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
-      }
+      bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+          ReadBarrier::GrayPtr(),
+          ReadBarrier::WhitePtr());
+      DCHECK(success) << "Must succeed as we won the race.";
     }
   }
 #else
   DCHECK(!kUseBakerReadBarrier);
 #endif
+
+  if (region_space_->IsInUnevacFromSpace(to_ref)) {
+    // Add to the live bytes per unevacuated from space. Note this code is always run by the
+    // GC-running thread (no synchronization required).
+    DCHECK(region_space_bitmap_->Test(to_ref));
+    // Disable the read barrier in SizeOf for performance, which is safe.
+    size_t obj_size = to_ref->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
+    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
+    region_space_->AddLiveBytes(to_ref, alloc_size);
+  }
   if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
     AssertToSpaceInvariantObjectVisitor visitor(this);
     visitor(to_ref);
@@ -1219,65 +1448,31 @@
   SweepLargeObjects(swap_bitmaps);
 }
 
+void ConcurrentCopying::MarkZygoteLargeObjects() {
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  Thread* const self = Thread::Current();
+  WriterMutexLock rmu(self, *Locks::heap_bitmap_lock_);
+  space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
+  // Pick the current live bitmap (mark bitmap if swapped).
+  accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
+  accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
+  // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
+  live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(los->Begin()),
+                                reinterpret_cast<uintptr_t>(los->End()),
+                                [mark_bitmap, los, self](mirror::Object* obj)
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (los->IsZygoteLargeObject(self, obj)) {
+      mark_bitmap->Set(obj);
+    }
+  });
+}
+
 void ConcurrentCopying::SweepLargeObjects(bool swap_bitmaps) {
   TimingLogger::ScopedTiming split("SweepLargeObjects", GetTimings());
   RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
-class ConcurrentCopying::ClearBlackPtrsVisitor {
- public:
-  explicit ClearBlackPtrsVisitor(ConcurrentCopying* cc) : collector_(cc) {}
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(obj != nullptr);
-    DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj;
-    DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj;
-    obj->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
-    DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
-// Clear the black ptrs in non-moving objects back to white.
-void ConcurrentCopying::ClearBlackPtrs() {
-  CHECK(kUseBakerReadBarrier);
-  TimingLogger::ScopedTiming split("ClearBlackPtrs", GetTimings());
-  ClearBlackPtrsVisitor visitor(this);
-  for (auto& space : heap_->GetContinuousSpaces()) {
-    if (space == region_space_) {
-      continue;
-    }
-    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-    if (kVerboseMode) {
-      LOG(INFO) << "ClearBlackPtrs: " << *space << " bitmap: " << *mark_bitmap;
-    }
-    mark_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                  reinterpret_cast<uintptr_t>(space->Limit()),
-                                  visitor);
-  }
-  space::LargeObjectSpace* large_object_space = heap_->GetLargeObjectsSpace();
-  large_object_space->GetMarkBitmap()->VisitMarkedRange(
-      reinterpret_cast<uintptr_t>(large_object_space->Begin()),
-      reinterpret_cast<uintptr_t>(large_object_space->End()),
-      visitor);
-  // Objects on the allocation stack?
-  if (ReadBarrier::kEnableReadBarrierInvariantChecks || kIsDebugBuild) {
-    size_t count = GetAllocationStack()->Size();
-    auto* it = GetAllocationStack()->Begin();
-    auto* end = GetAllocationStack()->End();
-    for (size_t i = 0; i < count; ++i, ++it) {
-      CHECK_LT(it, end);
-      mirror::Object* obj = it->AsMirrorPtr();
-      if (obj != nullptr) {
-        // Must have been cleared above.
-        CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
-      }
-    }
-  }
-}
-
 void ConcurrentCopying::ReclaimPhase() {
   TimingLogger::ScopedTiming split("ReclaimPhase", GetTimings());
   if (kVerboseMode) {
@@ -1296,6 +1491,9 @@
     IssueEmptyCheckpoint();
     // Disable the check.
     is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(0);
+    if (kUseBakerReadBarrier) {
+      updated_all_immune_objects_.StoreSequentiallyConsistent(false);
+    }
     CheckEmptyMarkStack();
   }
 
@@ -1308,7 +1506,9 @@
     uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
     uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
     uint64_t to_bytes = bytes_moved_.LoadSequentiallyConsistent();
+    cumulative_bytes_moved_.FetchAndAddRelaxed(to_bytes);
     uint64_t to_objects = objects_moved_.LoadSequentiallyConsistent();
+    cumulative_objects_moved_.FetchAndAddRelaxed(to_objects);
     if (kEnableFromSpaceAccountingCheck) {
       CHECK_EQ(from_space_num_objects_at_first_pause_, from_objects + unevac_from_objects);
       CHECK_EQ(from_space_num_bytes_at_first_pause_, from_bytes + unevac_from_bytes);
@@ -1335,31 +1535,19 @@
   }
 
   {
-    TimingLogger::ScopedTiming split3("ComputeUnevacFromSpaceLiveRatio", GetTimings());
-    ComputeUnevacFromSpaceLiveRatio();
-  }
-
-  {
     TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
     region_space_->ClearFromSpace();
   }
 
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    if (kUseBakerReadBarrier) {
-      ClearBlackPtrs();
-    }
     Sweep(false);
     SwapBitmaps();
     heap_->UnBindBitmaps();
 
-    // Remove bitmaps for the immune spaces.
-    while (!cc_bitmaps_.empty()) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap = cc_bitmaps_.back();
-      cc_heap_bitmap_->RemoveContinuousSpaceBitmap(cc_bitmap);
-      delete cc_bitmap;
-      cc_bitmaps_.pop_back();
-    }
+    // Delete the region bitmap.
+    DCHECK(region_space_bitmap_ != nullptr);
+    delete region_space_bitmap_;
     region_space_bitmap_ = nullptr;
   }
 
@@ -1370,39 +1558,6 @@
   }
 }
 
-class ConcurrentCopying::ComputeUnevacFromSpaceLiveRatioVisitor {
- public:
-  explicit ComputeUnevacFromSpaceLiveRatioVisitor(ConcurrentCopying* cc)
-      : collector_(cc) {}
-  void operator()(mirror::Object* ref) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(ref != nullptr);
-    DCHECK(collector_->region_space_bitmap_->Test(ref)) << ref;
-    DCHECK(collector_->region_space_->IsInUnevacFromSpace(ref)) << ref;
-    if (kUseBakerReadBarrier) {
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << ref;
-      // Clear the black ptr.
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << ref;
-    }
-    size_t obj_size = ref->SizeOf();
-    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
-    collector_->region_space_->AddLiveBytes(ref, alloc_size);
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
-// Compute how much live objects are left in regions.
-void ConcurrentCopying::ComputeUnevacFromSpaceLiveRatio() {
-  region_space_->AssertAllRegionLiveBytesZeroOrCleared();
-  ComputeUnevacFromSpaceLiveRatioVisitor visitor(this);
-  region_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(region_space_->Begin()),
-                                         reinterpret_cast<uintptr_t>(region_space_->Limit()),
-                                         visitor);
-}
-
 // Assert the to-space invariant.
 void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
                                                mirror::Object* ref) {
@@ -1473,7 +1628,7 @@
         ArtMethod* method = gc_root_source->GetArtMethod();
         LOG(INTERNAL_FATAL) << "gc root in method " << method << " " << PrettyMethod(method);
         RootPrinter root_printer;
-        method->VisitRoots(root_printer, sizeof(void*));
+        method->VisitRoots(root_printer, kRuntimePointerSize);
       }
       ref->GetLockWord(false).Dump(LOG(INTERNAL_FATAL));
       region_space_->DumpNonFreeRegions(LOG(INTERNAL_FATAL));
@@ -1508,15 +1663,6 @@
     // In a non-moving space.
     if (immune_spaces_.ContainsObject(obj)) {
       LOG(INFO) << "holder is in an immune image or the zygote space.";
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
-      CHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap.";
-      if (cc_bitmap->Test(obj)) {
-        LOG(INFO) << "holder is marked in the bit map.";
-      } else {
-        LOG(INFO) << "holder is NOT marked in the bit map.";
-      }
     } else {
       LOG(INFO) << "holder is in a non-immune, non-moving (or main) space.";
       accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -1547,17 +1693,17 @@
                                                                mirror::Object* ref) {
   // In a non-moving spaces. Check that the ref is marked.
   if (immune_spaces_.ContainsObject(ref)) {
-    accounting::ContinuousSpaceBitmap* cc_bitmap =
-        cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
-    CHECK(cc_bitmap != nullptr)
-        << "An immune space ref must have a bitmap. " << ref;
     if (kUseBakerReadBarrier) {
-      CHECK(cc_bitmap->Test(ref))
+      // Immune object may not be gray if called from the GC.
+      if (Thread::Current() == thread_running_gc_ && !gc_grays_immune_objects_) {
+        return;
+      }
+      bool updated_all_immune_objects = updated_all_immune_objects_.LoadSequentiallyConsistent();
+      CHECK(updated_all_immune_objects || ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
           << "Unmarked immune space ref. obj=" << obj << " rb_ptr="
-          << obj->GetReadBarrierPointer() << " ref=" << ref;
-    } else {
-      CHECK(cc_bitmap->Test(ref))
-          << "Unmarked immune space ref. obj=" << obj << " ref=" << ref;
+          << (obj != nullptr ? obj->GetReadBarrierPointer() : nullptr)
+          << " ref=" << ref << " ref rb_ptr=" << ref->GetReadBarrierPointer()
+          << " updated_all_immune_objects=" << updated_all_immune_objects;
     }
   } else {
     accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -1608,7 +1754,7 @@
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
       ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    collector_->MarkRoot(root);
+    collector_->MarkRoot</*kGrayImmuneObject*/false>(root);
   }
 
  private:
@@ -1617,18 +1763,27 @@
 
 // Scan ref fields of an object.
 inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
+  if (kDisallowReadBarrierDuringScan) {
+    // Avoid all read barriers during visit references to help performance.
+    Thread::Current()->ModifyDebugDisallowReadBarrier(1);
+  }
   DCHECK(!region_space_->IsInFromSpace(to_ref));
+  DCHECK_EQ(Thread::Current(), thread_running_gc_);
   RefFieldsVisitor visitor(this);
   // Disable the read barrier for a performance reason.
   to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
       visitor, visitor);
+  if (kDisallowReadBarrierDuringScan) {
+    Thread::Current()->ModifyDebugDisallowReadBarrier(-1);
+  }
 }
 
 // Process a field.
 inline void ConcurrentCopying::Process(mirror::Object* obj, MemberOffset offset) {
+  DCHECK_EQ(Thread::Current(), thread_running_gc_);
   mirror::Object* ref = obj->GetFieldObject<
       mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset);
-  mirror::Object* to_ref = Mark(ref);
+  mirror::Object* to_ref = Mark</*kGrayImmuneObject*/false>(ref);
   if (to_ref == ref) {
     return;
   }
@@ -1667,10 +1822,11 @@
   }
 }
 
+template<bool kGrayImmuneObject>
 inline void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) {
   DCHECK(!root->IsNull());
   mirror::Object* const ref = root->AsMirrorPtr();
-  mirror::Object* to_ref = Mark(ref);
+  mirror::Object* to_ref = Mark<kGrayImmuneObject>(ref);
   if (to_ref != ref) {
     auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
     auto expected_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref);
@@ -1691,39 +1847,75 @@
   for (size_t i = 0; i < count; ++i) {
     mirror::CompressedReference<mirror::Object>* const root = roots[i];
     if (!root->IsNull()) {
-      MarkRoot(root);
+      // kGrayImmuneObject is true because this is used for the thread flip.
+      MarkRoot</*kGrayImmuneObject*/true>(root);
     }
   }
 }
 
+// Temporary set gc_grays_immune_objects_ to true in a scope if the current thread is GC.
+class ConcurrentCopying::ScopedGcGraysImmuneObjects {
+ public:
+  explicit ScopedGcGraysImmuneObjects(ConcurrentCopying* collector)
+      : collector_(collector), enabled_(false) {
+    if (kUseBakerReadBarrier &&
+        collector_->thread_running_gc_ == Thread::Current() &&
+        !collector_->gc_grays_immune_objects_) {
+      collector_->gc_grays_immune_objects_ = true;
+      enabled_ = true;
+    }
+  }
+
+  ~ScopedGcGraysImmuneObjects() {
+    if (kUseBakerReadBarrier &&
+        collector_->thread_running_gc_ == Thread::Current() &&
+        enabled_) {
+      DCHECK(collector_->gc_grays_immune_objects_);
+      collector_->gc_grays_immune_objects_ = false;
+    }
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+  bool enabled_;
+};
+
 // Fill the given memory block with a dummy object. Used to fill in a
 // copy of objects that was lost in race.
 void ConcurrentCopying::FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size) {
+  // GC doesn't gray immune objects while scanning immune objects. But we need to trigger the read
+  // barriers here because we need the updated reference to the int array class, etc. Temporary set
+  // gc_grays_immune_objects_ to true so that we won't cause a DCHECK failure in MarkImmuneSpace().
+  ScopedGcGraysImmuneObjects scoped_gc_gray_immune_objects(this);
   CHECK_ALIGNED(byte_size, kObjectAlignment);
   memset(dummy_obj, 0, byte_size);
-  mirror::Class* int_array_class = mirror::IntArray::GetArrayClass();
+  // Avoid going through read barrier for since kDisallowReadBarrierDuringScan may be enabled.
+  // Explicitly mark to make sure to get an object in the to-space.
+  mirror::Class* int_array_class = down_cast<mirror::Class*>(
+      Mark(mirror::IntArray::GetArrayClass<kWithoutReadBarrier>()));
   CHECK(int_array_class != nullptr);
   AssertToSpaceInvariant(nullptr, MemberOffset(0), int_array_class);
-  size_t component_size = int_array_class->GetComponentSize();
+  size_t component_size = int_array_class->GetComponentSize<kWithoutReadBarrier>();
   CHECK_EQ(component_size, sizeof(int32_t));
   size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue();
   if (data_offset > byte_size) {
     // An int array is too big. Use java.lang.Object.
     mirror::Class* java_lang_Object = WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object);
     AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object);
-    CHECK_EQ(byte_size, java_lang_Object->GetObjectSize());
+    CHECK_EQ(byte_size, (java_lang_Object->GetObjectSize<kVerifyNone, kWithoutReadBarrier>()));
     dummy_obj->SetClass(java_lang_Object);
-    CHECK_EQ(byte_size, dummy_obj->SizeOf());
+    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()));
   } else {
     // Use an int array.
     dummy_obj->SetClass(int_array_class);
-    CHECK(dummy_obj->IsArrayInstance());
+    CHECK((dummy_obj->IsArrayInstance<kVerifyNone, kWithoutReadBarrier>()));
     int32_t length = (byte_size - data_offset) / component_size;
-    dummy_obj->AsArray()->SetLength(length);
-    CHECK_EQ(dummy_obj->AsArray()->GetLength(), length)
+    mirror::Array* dummy_arr = dummy_obj->AsArray<kVerifyNone, kWithoutReadBarrier>();
+    dummy_arr->SetLength(length);
+    CHECK_EQ(dummy_arr->GetLength(), length)
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
-    CHECK_EQ(byte_size, dummy_obj->SizeOf())
+    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()))
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
   }
@@ -1735,14 +1927,16 @@
   CHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
   Thread* self = Thread::Current();
   size_t min_object_size = RoundUp(sizeof(mirror::Object), space::RegionSpace::kAlignment);
-  MutexLock mu(self, skipped_blocks_lock_);
-  auto it = skipped_blocks_map_.lower_bound(alloc_size);
-  if (it == skipped_blocks_map_.end()) {
-    // Not found.
-    return nullptr;
-  }
+  size_t byte_size;
+  uint8_t* addr;
   {
-    size_t byte_size = it->first;
+    MutexLock mu(self, skipped_blocks_lock_);
+    auto it = skipped_blocks_map_.lower_bound(alloc_size);
+    if (it == skipped_blocks_map_.end()) {
+      // Not found.
+      return nullptr;
+    }
+    byte_size = it->first;
     CHECK_GE(byte_size, alloc_size);
     if (byte_size > alloc_size && byte_size - alloc_size < min_object_size) {
       // If remainder would be too small for a dummy object, retry with a larger request size.
@@ -1755,27 +1949,33 @@
       CHECK_GE(it->first - alloc_size, min_object_size)
           << "byte_size=" << byte_size << " it->first=" << it->first << " alloc_size=" << alloc_size;
     }
+    // Found a block.
+    CHECK(it != skipped_blocks_map_.end());
+    byte_size = it->first;
+    addr = it->second;
+    CHECK_GE(byte_size, alloc_size);
+    CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr)));
+    CHECK_ALIGNED(byte_size, space::RegionSpace::kAlignment);
+    if (kVerboseMode) {
+      LOG(INFO) << "Reusing skipped bytes : " << reinterpret_cast<void*>(addr) << ", " << byte_size;
+    }
+    skipped_blocks_map_.erase(it);
   }
-  // Found a block.
-  CHECK(it != skipped_blocks_map_.end());
-  size_t byte_size = it->first;
-  uint8_t* addr = it->second;
-  CHECK_GE(byte_size, alloc_size);
-  CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr)));
-  CHECK_ALIGNED(byte_size, space::RegionSpace::kAlignment);
-  if (kVerboseMode) {
-    LOG(INFO) << "Reusing skipped bytes : " << reinterpret_cast<void*>(addr) << ", " << byte_size;
-  }
-  skipped_blocks_map_.erase(it);
   memset(addr, 0, byte_size);
   if (byte_size > alloc_size) {
     // Return the remainder to the map.
     CHECK_ALIGNED(byte_size - alloc_size, space::RegionSpace::kAlignment);
     CHECK_GE(byte_size - alloc_size, min_object_size);
+    // FillWithDummyObject may mark an object, avoid holding skipped_blocks_lock_ to prevent lock
+    // violation and possible deadlock. The deadlock case is a recursive case:
+    // FillWithDummyObject -> IntArray::GetArrayClass -> Mark -> Copy -> AllocateInSkippedBlock.
     FillWithDummyObject(reinterpret_cast<mirror::Object*>(addr + alloc_size),
                         byte_size - alloc_size);
     CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr + alloc_size)));
-    skipped_blocks_map_.insert(std::make_pair(byte_size - alloc_size, addr + alloc_size));
+    {
+      MutexLock mu(self, skipped_blocks_lock_);
+      skipped_blocks_map_.insert(std::make_pair(byte_size - alloc_size, addr + alloc_size));
+    }
   }
   return reinterpret_cast<mirror::Object*>(addr);
 }
@@ -1934,21 +2134,8 @@
   } else {
     // from_ref is in a non-moving space.
     if (immune_spaces_.ContainsObject(from_ref)) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      DCHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap";
-      if (kIsDebugBuild) {
-        DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref))
-            << "Immune space object must be already marked";
-      }
-      if (cc_bitmap->Test(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
-      } else {
-        // Newly marked.
-        to_ref = nullptr;
-      }
+      // An immune object is alive.
+      to_ref = from_ref;
     } else {
       // Non-immune non-moving space. Use the mark bitmap.
       accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -1987,78 +2174,74 @@
 mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) {
   // ref is in a non-moving space (from_ref == to_ref).
   DCHECK(!region_space_->HasAddress(ref)) << ref;
-  if (immune_spaces_.ContainsObject(ref)) {
-    accounting::ContinuousSpaceBitmap* cc_bitmap =
-        cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
-    DCHECK(cc_bitmap != nullptr)
-        << "An immune space object must have a bitmap";
-    if (kIsDebugBuild) {
-      DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(ref)->Test(ref))
-          << "Immune space object must be already marked";
-    }
-    // This may or may not succeed, which is ok.
+  DCHECK(!immune_spaces_.ContainsObject(ref));
+  // Use the mark bitmap.
+  accounting::ContinuousSpaceBitmap* mark_bitmap =
+      heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
+  accounting::LargeObjectBitmap* los_bitmap =
+      heap_mark_bitmap_->GetLargeObjectBitmap(ref);
+  CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+  bool is_los = mark_bitmap == nullptr;
+  if (!is_los && mark_bitmap->Test(ref)) {
+    // Already marked.
     if (kUseBakerReadBarrier) {
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
     }
-    if (cc_bitmap->AtomicTestAndSet(ref)) {
-      // Already marked.
-    } else {
-      // Newly marked.
-      if (kUseBakerReadBarrier) {
-        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
-      }
-      PushOntoMarkStack(ref);
+  } else if (is_los && los_bitmap->Test(ref)) {
+    // Already marked in LOS.
+    if (kUseBakerReadBarrier) {
+      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
     }
   } else {
-    // Use the mark bitmap.
-    accounting::ContinuousSpaceBitmap* mark_bitmap =
-        heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
-    accounting::LargeObjectBitmap* los_bitmap =
-        heap_mark_bitmap_->GetLargeObjectBitmap(ref);
-    CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
-    bool is_los = mark_bitmap == nullptr;
-    if (!is_los && mark_bitmap->Test(ref)) {
-      // Already marked.
-      if (kUseBakerReadBarrier) {
-        DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+    // Not marked.
+    if (IsOnAllocStack(ref)) {
+      // If it's on the allocation stack, it's considered marked. Keep it white.
+      // Objects on the allocation stack need not be marked.
+      if (!is_los) {
+        DCHECK(!mark_bitmap->Test(ref));
+      } else {
+        DCHECK(!los_bitmap->Test(ref));
       }
-    } else if (is_los && los_bitmap->Test(ref)) {
-      // Already marked in LOS.
       if (kUseBakerReadBarrier) {
-        DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
       }
     } else {
-      // Not marked.
-      if (IsOnAllocStack(ref)) {
-        // If it's on the allocation stack, it's considered marked. Keep it white.
-        // Objects on the allocation stack need not be marked.
-        if (!is_los) {
-          DCHECK(!mark_bitmap->Test(ref));
-        } else {
-          DCHECK(!los_bitmap->Test(ref));
+      // For the baker-style RB, we need to handle 'false-gray' cases. See the
+      // kRegionTypeUnevacFromSpace-case comment in Mark().
+      if (kUseBakerReadBarrier) {
+        // Test the bitmap first to reduce the chance of false gray cases.
+        if ((!is_los && mark_bitmap->Test(ref)) ||
+            (is_los && los_bitmap->Test(ref))) {
+          return ref;
         }
-        if (kUseBakerReadBarrier) {
-          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+      }
+      // Not marked or on the allocation stack. Try to mark it.
+      // This may or may not succeed, which is ok.
+      bool cas_success = false;
+      if (kUseBakerReadBarrier) {
+        cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                       ReadBarrier::GrayPtr());
+      }
+      if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
+        // Already marked.
+        if (kUseBakerReadBarrier && cas_success &&
+            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+          PushOntoFalseGrayStack(ref);
+        }
+      } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
+        // Already marked in LOS.
+        if (kUseBakerReadBarrier && cas_success &&
+            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+          PushOntoFalseGrayStack(ref);
         }
       } else {
-        // Not marked or on the allocation stack. Try to mark it.
-        // This may or may not succeed, which is ok.
+        // Newly marked.
         if (kUseBakerReadBarrier) {
-          ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
         }
-        if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
-          // Already marked.
-        } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
-          // Already marked in LOS.
-        } else {
-          // Newly marked.
-          if (kUseBakerReadBarrier) {
-            DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
-          }
-          PushOntoMarkStack(ref);
-        }
+        PushOntoMarkStack(ref);
       }
     }
   }
@@ -2076,9 +2259,41 @@
     MutexLock mu(Thread::Current(), skipped_blocks_lock_);
     skipped_blocks_map_.clear();
   }
-  ReaderMutexLock mu(self, *Locks::mutator_lock_);
-  WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
-  heap_->ClearMarkedObjects();
+  {
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    {
+      WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+      heap_->ClearMarkedObjects();
+    }
+    if (kUseBakerReadBarrier && kFilterModUnionCards) {
+      TimingLogger::ScopedTiming split("FilterModUnionCards", GetTimings());
+      ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+      gc::Heap* const heap = Runtime::Current()->GetHeap();
+      for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+        DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+        accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
+        // Filter out cards that don't need to be set.
+        if (table != nullptr) {
+          table->FilterCards();
+        }
+      }
+    }
+    if (kUseBakerReadBarrier) {
+      TimingLogger::ScopedTiming split("EmptyRBMarkBitStack", GetTimings());
+      DCHECK(rb_mark_bit_stack_.get() != nullptr);
+      const auto* limit = rb_mark_bit_stack_->End();
+      for (StackReference<mirror::Object>* it = rb_mark_bit_stack_->Begin(); it != limit; ++it) {
+        CHECK(it->AsMirrorPtr()->AtomicSetMarkBit(1, 0));
+      }
+      rb_mark_bit_stack_->Reset();
+    }
+  }
+  if (measure_read_barrier_slow_path_) {
+    MutexLock mu(self, rb_slow_path_histogram_lock_);
+    rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed());
+    rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed();
+    rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed();
+  }
 }
 
 bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) {
@@ -2116,6 +2331,39 @@
   region_space_->RevokeAllThreadLocalBuffers();
 }
 
+mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) {
+  if (Thread::Current() != thread_running_gc_) {
+    rb_slow_path_count_.FetchAndAddRelaxed(1u);
+  } else {
+    rb_slow_path_count_gc_.FetchAndAddRelaxed(1u);
+  }
+  ScopedTrace tr(__FUNCTION__);
+  const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u;
+  mirror::Object* ret = Mark(from_ref);
+  if (measure_read_barrier_slow_path_) {
+    rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time);
+  }
+  return ret;
+}
+
+void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) {
+  GarbageCollector::DumpPerformanceInfo(os);
+  MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_);
+  if (rb_slow_path_time_histogram_.SampleSize() > 0) {
+    Histogram<uint64_t>::CumulativeData cumulative_data;
+    rb_slow_path_time_histogram_.CreateHistogram(&cumulative_data);
+    rb_slow_path_time_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data);
+  }
+  if (rb_slow_path_count_total_ > 0) {
+    os << "Slow path count " << rb_slow_path_count_total_ << "\n";
+  }
+  if (rb_slow_path_count_gc_total_ > 0) {
+    os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
+  }
+  os << "Cumulative bytes moved " << cumulative_bytes_moved_.LoadRelaxed() << "\n";
+  os << "Cumulative objects moved " << cumulative_objects_moved_.LoadRelaxed() << "\n";
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 5ceab12..5b0e2d6 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -51,23 +51,34 @@
 
 class ConcurrentCopying : public GarbageCollector {
  public:
-  // TODO: disable thse flags for production use.
   // Enable the no-from-space-refs verification at the pause.
-  static constexpr bool kEnableNoFromSpaceRefsVerification = true;
+  static constexpr bool kEnableNoFromSpaceRefsVerification = kIsDebugBuild;
   // Enable the from-space bytes/objects check.
-  static constexpr bool kEnableFromSpaceAccountingCheck = true;
+  static constexpr bool kEnableFromSpaceAccountingCheck = kIsDebugBuild;
   // Enable verbose mode.
   static constexpr bool kVerboseMode = false;
+  // If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
+  // pages.
+  static constexpr bool kGrayDirtyImmuneObjects = true;
 
-  ConcurrentCopying(Heap* heap, const std::string& name_prefix = "");
+  ConcurrentCopying(Heap* heap,
+                    const std::string& name_prefix = "",
+                    bool measure_read_barrier_slow_path = false);
   ~ConcurrentCopying();
 
-  virtual void RunPhases() OVERRIDE REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
-  void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+  virtual void RunPhases() OVERRIDE
+      REQUIRES(!immune_gray_stack_lock_,
+               !mark_stack_lock_,
+               !rb_slow_path_histogram_lock_,
+               !skipped_blocks_lock_);
+  void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_);
   void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
-  void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  void FinishPhase() REQUIRES(!mark_stack_lock_,
+                              !rb_slow_path_histogram_lock_,
+                              !skipped_blocks_lock_);
 
   void BindBitmaps() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::heap_bitmap_lock_);
@@ -93,8 +104,13 @@
     DCHECK(ref != nullptr);
     return IsMarked(ref) == ref;
   }
-  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  template<bool kGrayImmuneObject = true>
+  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   bool IsMarking() const {
     return is_marking_;
   }
@@ -114,20 +130,23 @@
   void PushOntoMarkStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
   mirror::Object* Copy(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!skipped_blocks_lock_, !mark_stack_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void Scan(mirror::Object* to_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
   void Process(mirror::Object* obj, MemberOffset offset)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_ , !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_ , !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  template<bool kGrayImmuneObject>
   void MarkRoot(mirror::CompressedReference<mirror::Object>* root)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
                           const RootInfo& info)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void VerifyNoFromSpaceReferences() REQUIRES(Locks::mutator_lock_);
   accounting::ObjectStack* GetAllocationStack();
   accounting::ObjectStack* GetLiveStack();
@@ -136,6 +155,12 @@
   bool ProcessMarkStackOnce() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void ProcessMarkStackRef(mirror::Object* to_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
+  void GrayAllDirtyImmuneObjects()
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void VerifyGrayImmuneObjects()
+      REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
   size_t ProcessThreadLocalMarkStacks(bool disable_weak_ref_access)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void RevokeThreadLocalMarkStacks(bool disable_weak_ref_access)
@@ -147,9 +172,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void ProcessReferences(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
   virtual mirror::Object* MarkObject(mirror::Object* from_ref) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
   virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) OVERRIDE
@@ -160,12 +187,14 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
   void SweepLargeObjects(bool swap_bitmaps)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
-  void ClearBlackPtrs()
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
+  void MarkZygoteLargeObjects()
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* AllocateInSkippedBlock(size_t alloc_size)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void CheckEmptyMarkStack() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void IssueEmptyCheckpoint() SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsOnAllocStack(mirror::Object* ref) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -185,10 +214,30 @@
   void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegion(mirror::Object* from_ref,
+      accounting::SpaceBitmap<kObjectAlignment>* bitmap)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  template<bool kGrayImmuneObject>
+  ALWAYS_INLINE mirror::Object* MarkImmuneSpace(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!immune_gray_stack_lock_);
+  void PushOntoFalseGrayStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void ProcessFalseGrayStack() SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void ScanImmuneObject(mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+  mirror::Object* MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
   std::unique_ptr<accounting::ObjectStack> gc_mark_stack_;
+  std::unique_ptr<accounting::ObjectStack> rb_mark_bit_stack_;
+  bool rb_mark_bit_stack_full_;
+  std::vector<mirror::Object*> false_gray_stack_ GUARDED_BY(mark_stack_lock_);
   Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<accounting::ObjectStack*> revoked_mark_stacks_
       GUARDED_BY(mark_stack_lock_);
@@ -201,8 +250,6 @@
   bool is_active_;                        // True while the collection is ongoing.
   bool is_asserting_to_space_invariant_;  // True while asserting the to-space invariant.
   ImmuneSpaces immune_spaces_;
-  std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_;
-  std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_;
   accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_;
   // A cache of Heap::GetMarkBitmap().
   accounting::HeapBitmap* heap_mark_bitmap_;
@@ -225,6 +272,8 @@
   // How many objects and bytes we moved. Used for accounting.
   Atomic<size_t> bytes_moved_;
   Atomic<size_t> objects_moved_;
+  Atomic<uint64_t> cumulative_bytes_moved_;
+  Atomic<uint64_t> cumulative_objects_moved_;
 
   // The skipped blocks are memory blocks/chucks that were copies of
   // objects that were unused due to lost races (cas failures) at
@@ -234,8 +283,26 @@
   Atomic<size_t> to_space_bytes_skipped_;
   Atomic<size_t> to_space_objects_skipped_;
 
+  // If measure_read_barrier_slow_path_ is true, we count how long is spent in MarkFromReadBarrier
+  // and also log.
+  bool measure_read_barrier_slow_path_;
+  // mark_from_read_barrier_measurements_ is true if systrace is enabled or
+  // measure_read_barrier_time_ is true.
+  bool mark_from_read_barrier_measurements_;
+  Atomic<uint64_t> rb_slow_path_ns_;
+  Atomic<uint64_t> rb_slow_path_count_;
+  Atomic<uint64_t> rb_slow_path_count_gc_;
+  mutable Mutex rb_slow_path_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  Histogram<uint64_t> rb_slow_path_time_histogram_ GUARDED_BY(rb_slow_path_histogram_lock_);
+  uint64_t rb_slow_path_count_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+  uint64_t rb_slow_path_count_gc_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+
   accounting::ReadBarrierTable* rb_table_;
   bool force_evacuate_all_;  // True if all regions are evacuated.
+  Atomic<bool> updated_all_immune_objects_;
+  bool gc_grays_immune_objects_;
+  Mutex immune_gray_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::vector<mirror::Object*> immune_gray_stack_ GUARDED_BY(immune_gray_stack_lock_);
 
   class AssertToSpaceInvariantFieldVisitor;
   class AssertToSpaceInvariantObjectVisitor;
@@ -244,14 +311,17 @@
   class ComputeUnevacFromSpaceLiveRatioVisitor;
   class DisableMarkingCheckpoint;
   class FlipCallback;
-  class ImmuneSpaceObjVisitor;
+  class GrayImmuneObjectVisitor;
+  class ImmuneSpaceScanObjVisitor;
   class LostCopyVisitor;
   class RefFieldsVisitor;
   class RevokeThreadLocalMarkStackCheckpoint;
+  class ScopedGcGraysImmuneObjects;
+  class ThreadFlipVisitor;
+  class VerifyGrayImmuneObjectsVisitor;
   class VerifyNoFromSpaceRefsFieldVisitor;
   class VerifyNoFromSpaceRefsObjectVisitor;
   class VerifyNoFromSpaceRefsVisitor;
-  class ThreadFlipVisitor;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying);
 };
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 580486a..e0b71a7 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -181,7 +181,7 @@
   void RecordFree(const ObjectBytePair& freed);
   // Record a free of large objects.
   void RecordFreeLOS(const ObjectBytePair& freed);
-  void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
+  virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
 
   // Helper functions for querying if objects are marked. These are used for processing references,
   // and will be used for reading system weaks while the GC is running.
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 24cbf10..3904160 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "base/bounded_fifo.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex-inl.h"
@@ -430,13 +431,12 @@
                             << " first_ref_field_offset="
                             << (holder_->IsClass()
                                 ? holder_->AsClass()->GetFirstReferenceStaticFieldOffset(
-                                    sizeof(void*))
+                                    kRuntimePointerSize)
                                 : holder_->GetClass()->GetFirstReferenceInstanceFieldOffset())
                             << " num_of_ref_fields="
                             << (holder_->IsClass()
                                 ? holder_->AsClass()->NumReferenceStaticFields()
-                                : holder_->GetClass()->NumReferenceInstanceFields())
-                            << "\n";
+                                : holder_->GetClass()->NumReferenceInstanceFields());
         // Print the memory content of the holder.
         for (size_t i = 0; i < holder_size / sizeof(uint32_t); ++i) {
           uint32_t* p = reinterpret_cast<uint32_t*>(holder_);
@@ -446,27 +446,8 @@
       }
       PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
       MemMap::DumpMaps(LOG(INTERNAL_FATAL), true);
-      {
-        LOG(INTERNAL_FATAL) << "Attempting see if it's a bad root";
-        Thread* self = Thread::Current();
-        if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
-          mark_sweep_->VerifyRoots();
-        } else {
-          const bool heap_bitmap_exclusive_locked =
-              Locks::heap_bitmap_lock_->IsExclusiveHeld(self);
-          if (heap_bitmap_exclusive_locked) {
-            Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
-          }
-          {
-            ScopedThreadSuspension(self, kSuspended);
-            ScopedSuspendAll ssa(__FUNCTION__);
-            mark_sweep_->VerifyRoots();
-          }
-          if (heap_bitmap_exclusive_locked) {
-            Locks::heap_bitmap_lock_->ExclusiveLock(self);
-          }
-        }
-      }
+      LOG(INTERNAL_FATAL) << "Attempting see if it's a bad thread root";
+      mark_sweep_->VerifySuspendedThreadRoots();
       LOG(FATAL) << "Can't mark invalid object";
     }
   }
@@ -599,9 +580,9 @@
   }
 };
 
-void MarkSweep::VerifyRoots() {
+void MarkSweep::VerifySuspendedThreadRoots() {
   VerifyRootVisitor visitor;
-  Runtime::Current()->GetThreadList()->VisitRoots(&visitor);
+  Runtime::Current()->GetThreadList()->VisitRootsForSuspendedThreads(&visitor);
 }
 
 void MarkSweep::MarkRoots(Thread* self) {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 92d7cf6..9747031 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -250,8 +250,8 @@
 
   // Verify the roots of the heap and print out information related to any invalid roots.
   // Called in MarkObject, so may we may not hold the mutator lock.
-  void VerifyRoots()
-      NO_THREAD_SAFETY_ANALYSIS;
+  void VerifySuspendedThreadRoots()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Expand mark stack to 2x its current size.
   void ExpandMarkStack()
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index f14d086..7899a7c 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -44,6 +44,8 @@
   kCollectorTypeInstrumentation,
   // Fake collector for adding or removing application image spaces.
   kCollectorTypeAddRemoveAppImageSpace,
+  // Fake collector used to implement exclusion between GC and debugger.
+  kCollectorTypeDebugger,
   // A homogeneous space compaction collector used in background transition
   // when both foreground and background collector are CMS.
   kCollectorTypeHomogeneousSpaceCompact,
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index cf765f5..1d377a4 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -35,6 +35,7 @@
     case kGcCauseTrim: return "HeapTrim";
     case kGcCauseInstrumentation: return "Instrumentation";
     case kGcCauseAddRemoveAppImageSpace: return "AddRemoveAppImageSpace";
+    case kGcCauseDebugger: return "Debugger";
     case kGcCauseClassLinker: return "ClassLinker";
     case kGcCauseJitCodeCache: return "JitCodeCache";
     default:
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 7d2cc4f..4348a41 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -43,6 +43,8 @@
   kGcCauseInstrumentation,
   // Not a real GC cause, used to add or remove app image spaces.
   kGcCauseAddRemoveAppImageSpace,
+  // Not a real GC cause, used to implement exclusion between GC and debugger.
+  kGcCauseDebugger,
   // GC triggered for background transition when both foreground and background collector are CMS.
   kGcCauseHomogeneousSpaceCompact,
   // Class linker cause, used to guard filling art methods with special values.
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 6aed61a..b0ca18e 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -49,6 +49,7 @@
     // done in the runnable state where suspension is expected.
     CHECK_EQ(self->GetState(), kRunnable);
     self->AssertThreadSuspensionIsAllowable();
+    self->AssertNoPendingException();
   }
   // Need to check that we arent the large object allocator since the large object allocation code
   // path this function. If we didn't check we would have an infinite loop.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 03b7713..638c1d8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -121,6 +121,10 @@
 
 static constexpr size_t kNativeAllocationHistogramBuckets = 16;
 
+// Extra added to the heap growth multiplier. Used to adjust the GC ergonomics for the read barrier
+// config.
+static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0;
+
 static inline bool CareAboutPauseTimes() {
   return Runtime::Current()->InJankPerceptibleProcessState();
 }
@@ -153,6 +157,7 @@
            bool verify_pre_sweeping_rosalloc,
            bool verify_post_gc_rosalloc,
            bool gc_stress_mode,
+           bool measure_gc_performance,
            bool use_homogeneous_space_compaction_for_oom,
            uint64_t min_interval_homogeneous_space_compaction_by_oom)
     : non_moving_space_(nullptr),
@@ -220,10 +225,14 @@
       min_free_(min_free),
       max_free_(max_free),
       target_utilization_(target_utilization),
-      foreground_heap_growth_multiplier_(foreground_heap_growth_multiplier),
+      foreground_heap_growth_multiplier_(
+          foreground_heap_growth_multiplier + kExtraHeapGrowthMultiplier),
       total_wait_time_(0),
       verify_object_mode_(kVerifyObjectModeDisabled),
       disable_moving_gc_count_(0),
+      semi_space_collector_(nullptr),
+      mark_compact_collector_(nullptr),
+      concurrent_copying_collector_(nullptr),
       is_running_on_memory_tool_(Runtime::Current()->IsRunningOnMemoryTool()),
       use_tlab_(use_tlab),
       main_space_backup_(nullptr),
@@ -251,6 +260,7 @@
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
+  CHECK_GE(large_object_threshold, kMinLargeObjectThreshold);
   ScopedTrace trace(__FUNCTION__);
   Runtime* const runtime = Runtime::Current();
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
@@ -287,12 +297,13 @@
     for (size_t index = 0; index < image_file_names.size(); ++index) {
       std::string& image_name = image_file_names[index];
       std::string error_msg;
-      space::ImageSpace* boot_image_space = space::ImageSpace::CreateBootImage(
+      std::unique_ptr<space::ImageSpace> boot_image_space_uptr = space::ImageSpace::CreateBootImage(
           image_name.c_str(),
           image_instruction_set,
           index > 0,
           &error_msg);
-      if (boot_image_space != nullptr) {
+      if (boot_image_space_uptr != nullptr) {
+        space::ImageSpace* boot_image_space = boot_image_space_uptr.release();
         AddSpace(boot_image_space);
         added_image_spaces.push_back(boot_image_space);
         // Oat files referenced by image files immediately follow them in memory, ensure alloc space
@@ -442,7 +453,7 @@
   }
   // Create other spaces based on whether or not we have a moving GC.
   if (foreground_collector_type_ == kCollectorTypeCC) {
-    region_space_ = space::RegionSpace::Create("Region space", capacity_ * 2, request_begin);
+    region_space_ = space::RegionSpace::Create("main space (region space)", capacity_ * 2, request_begin);
     AddSpace(region_space_);
   } else if (IsMovingGc(foreground_collector_type_) &&
       foreground_collector_type_ != kCollectorTypeGSS) {
@@ -594,7 +605,9 @@
       garbage_collectors_.push_back(semi_space_collector_);
     }
     if (MayUseCollector(kCollectorTypeCC)) {
-      concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
+      concurrent_copying_collector_ = new collector::ConcurrentCopying(this,
+                                                                       "",
+                                                                       measure_gc_performance);
       garbage_collectors_.push_back(concurrent_copying_collector_);
     }
     if (MayUseCollector(kCollectorTypeMC)) {
@@ -865,9 +878,13 @@
   MutexLock mu(self, *thread_flip_lock_);
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
-  while (thread_flip_running_) {
-    has_waited = true;
-    thread_flip_cond_->Wait(self);
+  if (thread_flip_running_) {
+    TimingLogger::ScopedTiming split("IncrementDisableThreadFlip",
+                                     GetCurrentGcIteration()->GetTimings());
+    while (thread_flip_running_) {
+      has_waited = true;
+      thread_flip_cond_->Wait(self);
+    }
   }
   ++disable_thread_flip_count_;
   if (has_waited) {
@@ -2530,14 +2547,38 @@
   AddSpace(zygote_space_);
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   AddSpace(non_moving_space_);
+  if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+    // Treat all of the objects in the zygote as marked to avoid unnecessary dirty pages. This is
+    // safe since we mark all of the objects that may reference non immune objects as gray.
+    zygote_space_->GetLiveBitmap()->VisitMarkedRange(
+        reinterpret_cast<uintptr_t>(zygote_space_->Begin()),
+        reinterpret_cast<uintptr_t>(zygote_space_->Limit()),
+        [](mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+      CHECK(obj->AtomicSetMarkBit(0, 1));
+    });
+  }
+
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
-      new accounting::ModUnionTableCardCache("zygote space mod-union table", this,
-                                             zygote_space_);
+      new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space_);
   CHECK(mod_union_table != nullptr) << "Failed to create zygote space mod-union table";
-  // Set all the cards in the mod-union table since we don't know which objects contain references
-  // to large objects.
-  mod_union_table->SetCards();
+
+  if (collector_type_ != kCollectorTypeCC) {
+    // Set all the cards in the mod-union table since we don't know which objects contain references
+    // to large objects.
+    mod_union_table->SetCards();
+  } else {
+    // For CC we never collect zygote large objects. This means we do not need to set the cards for
+    // the zygote mod-union table and we can also clear all of the existing image mod-union tables.
+    // The existing mod-union tables are only for image spaces and may only reference zygote and
+    // image objects.
+    for (auto& pair : mod_union_tables_) {
+      CHECK(pair.first->IsImageSpace());
+      CHECK(!pair.first->AsImageSpace()->GetImageHeader().IsAppImage());
+      accounting::ModUnionTable* table = pair.second;
+      table->ClearTable();
+    }
+  }
   AddModUnionTable(mod_union_table);
   large_object_space_->SetAllLargeObjectsAsZygoteObjects(self);
   if (collector::SemiSpace::kUseRememberedSet) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 2a1a4a1..be8ed40 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -128,11 +128,12 @@
   static constexpr size_t kDefaultMinFree = kDefaultMaxFree / 4;
   static constexpr size_t kDefaultLongPauseLogThreshold = MsToNs(5);
   static constexpr size_t kDefaultLongGCLogThreshold = MsToNs(100);
-  static constexpr size_t kDefaultTLABSize = 256 * KB;
+  static constexpr size_t kDefaultTLABSize = 32 * KB;
   static constexpr double kDefaultTargetUtilization = 0.5;
   static constexpr double kDefaultHeapGrowthMultiplier = 2.0;
   // Primitive arrays larger than this size are put in the large object space.
-  static constexpr size_t kDefaultLargeObjectThreshold = 3 * kPageSize;
+  static constexpr size_t kMinLargeObjectThreshold = 3 * kPageSize;
+  static constexpr size_t kDefaultLargeObjectThreshold = kMinLargeObjectThreshold;
   // Whether or not parallel GC is enabled. If not, then we never create the thread pool.
   static constexpr bool kDefaultEnableParallelGC = false;
 
@@ -182,6 +183,7 @@
        bool verify_pre_sweeping_rosalloc,
        bool verify_post_gc_rosalloc,
        bool gc_stress_mode,
+       bool measure_gc_performance,
        bool use_homogeneous_space_compaction,
        uint64_t min_interval_homogeneous_space_compaction_by_oom);
 
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 03ab9a1..62625c4 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -44,7 +44,9 @@
     // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
     list_ = ref;
   } else {
-    mirror::Reference* head = list_->GetPendingNext();
+    // The list is owned by the GC, everything that has been inserted must already be at least
+    // gray.
+    mirror::Reference* head = list_->GetPendingNext<kWithoutReadBarrier>();
     DCHECK(head != nullptr);
     ref->SetPendingNext(head);
   }
@@ -54,45 +56,33 @@
 
 mirror::Reference* ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
-  mirror::Reference* ref = list_->GetPendingNext();
+  mirror::Reference* ref = list_->GetPendingNext<kWithoutReadBarrier>();
   DCHECK(ref != nullptr);
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
   // is single threaded.
   if (list_ == ref) {
     list_ = nullptr;
   } else {
-    mirror::Reference* next = ref->GetPendingNext();
+    mirror::Reference* next = ref->GetPendingNext<kWithoutReadBarrier>();
     list_->SetPendingNext(next);
   }
   ref->SetPendingNext(nullptr);
   Heap* heap = Runtime::Current()->GetHeap();
   if (kUseBakerOrBrooksReadBarrier && heap->CurrentCollectorType() == kCollectorTypeCC &&
       heap->ConcurrentCopyingCollector()->IsActive()) {
-    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to black or white.
+    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to white.
     // We check IsActive() above because we don't want to do this when the zygote compaction
     // collector (SemiSpace) is running.
     CHECK(ref != nullptr);
     collector::ConcurrentCopying* concurrent_copying = heap->ConcurrentCopyingCollector();
-    const bool is_moving = concurrent_copying->RegionSpace()->IsInToSpace(ref);
-    if (ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-      if (is_moving) {
-        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
-      } else {
-        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
-      }
+    mirror::Object* rb_ptr = ref->GetReadBarrierPointer();
+    if (rb_ptr == ReadBarrier::GrayPtr()) {
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
     } else {
-      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a black or white Reference in the
-      // queue and find it here, which is OK. Check that the color makes sense depending on whether
-      // the Reference is moving or not and that the referent has been marked.
-      if (is_moving) {
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-      } else {
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr())
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-      }
+      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a white reference in the queue and
+      // find it here, which is OK.
+      CHECK_EQ(rb_ptr, ReadBarrier::WhitePtr()) << "ref=" << ref << " rb_ptr=" << rb_ptr;
       mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
       // The referent could be null if it's cleared by a mutator (Reference.clear()).
       if (referent != nullptr) {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index b7b9ffe..ae6c321 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -23,6 +23,7 @@
 #include <unistd.h>
 
 #include "art_method.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
 #include "base/scoped_flock.h"
@@ -78,7 +79,12 @@
   return r;
 }
 
-static bool GenerateImage(const std::string& image_filename, InstructionSet image_isa,
+static int32_t ChooseRelocationOffsetDelta() {
+  return ChooseRelocationOffsetDelta(ART_BASE_ADDRESS_MIN_DELTA, ART_BASE_ADDRESS_MAX_DELTA);
+}
+
+static bool GenerateImage(const std::string& image_filename,
+                          InstructionSet image_isa,
                           std::string* error_msg) {
   const std::string boot_class_path_string(Runtime::Current()->GetBootClassPathString());
   std::vector<std::string> boot_class_path;
@@ -117,8 +123,7 @@
   CHECK_EQ(image_isa, kRuntimeISA)
       << "We should always be generating an image for the current isa.";
 
-  int32_t base_offset = ChooseRelocationOffsetDelta(ART_BASE_ADDRESS_MIN_DELTA,
-                                                    ART_BASE_ADDRESS_MAX_DELTA);
+  int32_t base_offset = ChooseRelocationOffsetDelta();
   LOG(INFO) << "Using an offset of 0x" << std::hex << base_offset << " from default "
             << "art base address of 0x" << std::hex << ART_BASE_ADDRESS;
   arg_vector.push_back(StringPrintf("--base=0x%x", ART_BASE_ADDRESS + base_offset));
@@ -137,14 +142,17 @@
   return Exec(arg_vector, error_msg);
 }
 
-bool ImageSpace::FindImageFilename(const char* image_location,
-                                   const InstructionSet image_isa,
-                                   std::string* system_filename,
-                                   bool* has_system,
-                                   std::string* cache_filename,
-                                   bool* dalvik_cache_exists,
-                                   bool* has_cache,
-                                   bool* is_global_cache) {
+static bool FindImageFilenameImpl(const char* image_location,
+                                  const InstructionSet image_isa,
+                                  bool* has_system,
+                                  std::string* system_filename,
+                                  bool* dalvik_cache_exists,
+                                  std::string* dalvik_cache,
+                                  bool* is_global_cache,
+                                  bool* has_cache,
+                                  std::string* cache_filename) {
+  DCHECK(dalvik_cache != nullptr);
+
   *has_system = false;
   *has_cache = false;
   // image_location = /system/framework/boot.art
@@ -157,9 +165,12 @@
 
   bool have_android_data = false;
   *dalvik_cache_exists = false;
-  std::string dalvik_cache;
-  GetDalvikCache(GetInstructionSetString(image_isa), true, &dalvik_cache,
-                 &have_android_data, dalvik_cache_exists, is_global_cache);
+  GetDalvikCache(GetInstructionSetString(image_isa),
+                 true,
+                 dalvik_cache,
+                 &have_android_data,
+                 dalvik_cache_exists,
+                 is_global_cache);
 
   if (have_android_data && *dalvik_cache_exists) {
     // Always set output location even if it does not exist,
@@ -168,7 +179,10 @@
     // image_location = /system/framework/boot.art
     // *image_filename = /data/dalvik-cache/<image_isa>/boot.art
     std::string error_msg;
-    if (!GetDalvikCacheFilename(image_location, dalvik_cache.c_str(), cache_filename, &error_msg)) {
+    if (!GetDalvikCacheFilename(image_location,
+                                dalvik_cache->c_str(),
+                                cache_filename,
+                                &error_msg)) {
       LOG(WARNING) << error_msg;
       return *has_system;
     }
@@ -177,6 +191,26 @@
   return *has_system || *has_cache;
 }
 
+bool ImageSpace::FindImageFilename(const char* image_location,
+                                   const InstructionSet image_isa,
+                                   std::string* system_filename,
+                                   bool* has_system,
+                                   std::string* cache_filename,
+                                   bool* dalvik_cache_exists,
+                                   bool* has_cache,
+                                   bool* is_global_cache) {
+  std::string dalvik_cache_unused;
+  return FindImageFilenameImpl(image_location,
+                               image_isa,
+                               has_system,
+                               system_filename,
+                               dalvik_cache_exists,
+                               &dalvik_cache_unused,
+                               is_global_cache,
+                               has_cache,
+                               cache_filename);
+}
+
 static bool ReadSpecificImageHeader(const char* filename, ImageHeader* image_header) {
     std::unique_ptr<File> image_file(OS::OpenFileForReading(filename));
     if (image_file.get() == nullptr) {
@@ -190,8 +224,10 @@
 }
 
 // Relocate the image at image_location to dest_filename and relocate it by a random amount.
-static bool RelocateImage(const char* image_location, const char* dest_filename,
-                               InstructionSet isa, std::string* error_msg) {
+static bool RelocateImage(const char* image_location,
+                          const char* dest_filename,
+                          InstructionSet isa,
+                          std::string* error_msg) {
   // We should clean up so we are more likely to have room for the image.
   if (Runtime::Current()->IsZygote()) {
     LOG(INFO) << "Pruning dalvik-cache since we are relocating an image and will need to recompile";
@@ -210,8 +246,7 @@
   instruction_set_arg += GetInstructionSetString(isa);
 
   std::string base_offset_arg("--base-offset-delta=");
-  StringAppendF(&base_offset_arg, "%d", ChooseRelocationOffsetDelta(ART_BASE_ADDRESS_MIN_DELTA,
-                                                                    ART_BASE_ADDRESS_MAX_DELTA));
+  StringAppendF(&base_offset_arg, "%d", ChooseRelocationOffsetDelta());
 
   std::vector<std::string> argv;
   argv.push_back(patchoat);
@@ -236,16 +271,6 @@
   return hdr.release();
 }
 
-ImageHeader* ImageSpace::ReadImageHeaderOrDie(const char* image_location,
-                                              const InstructionSet image_isa) {
-  std::string error_msg;
-  ImageHeader* image_header = ReadImageHeader(image_location, image_isa, &error_msg);
-  if (image_header == nullptr) {
-    LOG(FATAL) << error_msg;
-  }
-  return image_header;
-}
-
 ImageHeader* ImageSpace::ReadImageHeader(const char* image_location,
                                          const InstructionSet image_isa,
                                          std::string* error_msg) {
@@ -311,11 +336,31 @@
   return nullptr;
 }
 
-static bool ChecksumsMatch(const char* image_a, const char* image_b) {
+static bool ChecksumsMatch(const char* image_a, const char* image_b, std::string* error_msg) {
+  DCHECK(error_msg != nullptr);
+
   ImageHeader hdr_a;
   ImageHeader hdr_b;
-  return ReadSpecificImageHeader(image_a, &hdr_a) && ReadSpecificImageHeader(image_b, &hdr_b)
-      && hdr_a.GetOatChecksum() == hdr_b.GetOatChecksum();
+
+  if (!ReadSpecificImageHeader(image_a, &hdr_a)) {
+    *error_msg = StringPrintf("Cannot read header of %s", image_a);
+    return false;
+  }
+  if (!ReadSpecificImageHeader(image_b, &hdr_b)) {
+    *error_msg = StringPrintf("Cannot read header of %s", image_b);
+    return false;
+  }
+
+  if (hdr_a.GetOatChecksum() != hdr_b.GetOatChecksum()) {
+    *error_msg = StringPrintf("Checksum mismatch: %u(%s) vs %u(%s)",
+                              hdr_a.GetOatChecksum(),
+                              image_a,
+                              hdr_b.GetOatChecksum(),
+                              image_b);
+    return false;
+  }
+
+  return true;
 }
 
 static bool ImageCreationAllowed(bool is_global_cache, std::string* error_msg) {
@@ -333,256 +378,6 @@
   return false;
 }
 
-static constexpr uint64_t kLowSpaceValue = 50 * MB;
-static constexpr uint64_t kTmpFsSentinelValue = 384 * MB;
-
-// Read the free space of the cache partition and make a decision whether to keep the generated
-// image. This is to try to mitigate situations where the system might run out of space later.
-static bool CheckSpace(const std::string& cache_filename, std::string* error_msg) {
-  // Using statvfs vs statvfs64 because of b/18207376, and it is enough for all practical purposes.
-  struct statvfs buf;
-
-  int res = TEMP_FAILURE_RETRY(statvfs(cache_filename.c_str(), &buf));
-  if (res != 0) {
-    // Could not stat. Conservatively tell the system to delete the image.
-    *error_msg = "Could not stat the filesystem, assuming low-memory situation.";
-    return false;
-  }
-
-  uint64_t fs_overall_size = buf.f_bsize * static_cast<uint64_t>(buf.f_blocks);
-  // Zygote is privileged, but other things are not. Use bavail.
-  uint64_t fs_free_size = buf.f_bsize * static_cast<uint64_t>(buf.f_bavail);
-
-  // Take the overall size as an indicator for a tmpfs, which is being used for the decryption
-  // environment. We do not want to fail quickening the boot image there, as it is beneficial
-  // for time-to-UI.
-  if (fs_overall_size > kTmpFsSentinelValue) {
-    if (fs_free_size < kLowSpaceValue) {
-      *error_msg = StringPrintf("Low-memory situation: only %4.2f megabytes available after image"
-                                " generation, need at least %" PRIu64 ".",
-                                static_cast<double>(fs_free_size) / MB,
-                                kLowSpaceValue / MB);
-      return false;
-    }
-  }
-  return true;
-}
-
-ImageSpace* ImageSpace::CreateBootImage(const char* image_location,
-                                        const InstructionSet image_isa,
-                                        bool secondary_image,
-                                        std::string* error_msg) {
-  ScopedTrace trace(__FUNCTION__);
-  std::string system_filename;
-  bool has_system = false;
-  std::string cache_filename;
-  bool has_cache = false;
-  bool dalvik_cache_exists = false;
-  bool is_global_cache = true;
-  bool found_image = FindImageFilename(image_location, image_isa, &system_filename,
-                                       &has_system, &cache_filename, &dalvik_cache_exists,
-                                       &has_cache, &is_global_cache);
-
-  const bool is_zygote = Runtime::Current()->IsZygote();
-  if (is_zygote && !secondary_image) {
-    MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots());
-  }
-
-  ImageSpace* space;
-  bool relocate = Runtime::Current()->ShouldRelocate();
-  bool can_compile = Runtime::Current()->IsImageDex2OatEnabled();
-  if (found_image) {
-    const std::string* image_filename;
-    bool is_system = false;
-    bool relocated_version_used = false;
-    if (relocate) {
-      if (!dalvik_cache_exists) {
-        *error_msg = StringPrintf("Requiring relocation for image '%s' at '%s' but we do not have "
-                                  "any dalvik_cache to find/place it in.",
-                                  image_location, system_filename.c_str());
-        return nullptr;
-      }
-      if (has_system) {
-        if (has_cache && ChecksumsMatch(system_filename.c_str(), cache_filename.c_str())) {
-          // We already have a relocated version
-          image_filename = &cache_filename;
-          relocated_version_used = true;
-        } else {
-          // We cannot have a relocated version, Relocate the system one and use it.
-
-          std::string reason;
-          bool success;
-
-          // Check whether we are allowed to relocate.
-          if (!can_compile) {
-            reason = "Image dex2oat disabled by -Xnoimage-dex2oat.";
-            success = false;
-          } else if (!ImageCreationAllowed(is_global_cache, &reason)) {
-            // Whether we can write to the cache.
-            success = false;
-          } else if (secondary_image) {
-            if (is_zygote) {
-              // Secondary image is out of date. Clear cache and exit to let it retry from scratch.
-              LOG(ERROR) << "Cannot patch secondary image '" << image_location
-                         << "', clearing dalvik_cache and restarting zygote.";
-              PruneDalvikCache(image_isa);
-              _exit(1);
-            } else {
-              reason = "Should not have to patch secondary image.";
-              success = false;
-            }
-          } else {
-            // Try to relocate.
-            success = RelocateImage(image_location, cache_filename.c_str(), image_isa, &reason);
-          }
-
-          if (success) {
-            relocated_version_used = true;
-            image_filename = &cache_filename;
-          } else {
-            *error_msg = StringPrintf("Unable to relocate image '%s' from '%s' to '%s': %s",
-                                      image_location, system_filename.c_str(),
-                                      cache_filename.c_str(), reason.c_str());
-            // We failed to create files, remove any possibly garbage output.
-            // Since ImageCreationAllowed was true above, we are the zygote
-            // and therefore the only process expected to generate these for
-            // the device.
-            PruneDalvikCache(image_isa);
-            return nullptr;
-          }
-        }
-      } else {
-        CHECK(has_cache);
-        // We can just use cache's since it should be fine. This might or might not be relocated.
-        image_filename = &cache_filename;
-      }
-    } else {
-      if (has_system && has_cache) {
-        // Check they have the same cksum. If they do use the cache. Otherwise system.
-        if (ChecksumsMatch(system_filename.c_str(), cache_filename.c_str())) {
-          image_filename = &cache_filename;
-          relocated_version_used = true;
-        } else {
-          image_filename = &system_filename;
-          is_system = true;
-        }
-      } else if (has_system) {
-        image_filename = &system_filename;
-        is_system = true;
-      } else {
-        CHECK(has_cache);
-        image_filename = &cache_filename;
-      }
-    }
-    {
-      // Note that we must not use the file descriptor associated with
-      // ScopedFlock::GetFile to Init the image file. We want the file
-      // descriptor (and the associated exclusive lock) to be released when
-      // we leave Create.
-      ScopedFlock image_lock;
-      // Should this be a RDWR lock? This is only a defensive measure, as at
-      // this point the image should exist.
-      // However, only the zygote can write into the global dalvik-cache, so
-      // restrict to zygote processes, or any process that isn't using
-      // /data/dalvik-cache (which we assume to be allowed to write there).
-      const bool rw_lock = is_zygote || !is_global_cache;
-      image_lock.Init(image_filename->c_str(),
-                      rw_lock ? (O_CREAT | O_RDWR) : O_RDONLY /* flags */,
-                      true /* block */,
-                      error_msg);
-      VLOG(startup) << "Using image file " << image_filename->c_str() << " for image location "
-                    << image_location;
-      // If we are in /system we can assume the image is good. We can also
-      // assume this if we are using a relocated image (i.e. image checksum
-      // matches) since this is only different by the offset. We need this to
-      // make sure that host tests continue to work.
-      // Since we are the boot image, pass null since we load the oat file from the boot image oat
-      // file name.
-      space = ImageSpace::Init(image_filename->c_str(),
-                               image_location,
-                               !(is_system || relocated_version_used),
-                               /* oat_file */nullptr,
-                               error_msg);
-    }
-    if (space != nullptr) {
-      // Check whether there is enough space left over in the data partition. Even if we can load
-      // the image, we need to be conservative, as some parts of the platform are not very tolerant
-      // of space constraints.
-      // ImageSpace doesn't know about the data partition per se, it relies on the FindImageFilename
-      // helper (which relies on GetDalvikCache). So for now, if we load an image out of /system,
-      // ignore the check (as it would test for free space in /system instead).
-      if (!is_system && !CheckSpace(*image_filename, error_msg)) {
-        // No. Delete the generated image and try to run out of the dex files.
-        PruneDalvikCache(image_isa);
-        return nullptr;
-      }
-      return space;
-    }
-
-    if (relocated_version_used) {
-      // Something is wrong with the relocated copy (even though checksums match). Cleanup.
-      // This can happen if the .oat is corrupt, since the above only checks the .art checksums.
-      // TODO: Check the oat file validity earlier.
-      *error_msg = StringPrintf("Attempted to use relocated version of %s at %s generated from %s "
-                                "but image failed to load: %s",
-                                image_location, cache_filename.c_str(), system_filename.c_str(),
-                                error_msg->c_str());
-      PruneDalvikCache(image_isa);
-      return nullptr;
-    } else if (is_system) {
-      // If the /system file exists, it should be up-to-date, don't try to generate it.
-      *error_msg = StringPrintf("Failed to load /system image '%s': %s",
-                                image_filename->c_str(), error_msg->c_str());
-      return nullptr;
-    } else {
-      // Otherwise, log a warning and fall through to GenerateImage.
-      LOG(WARNING) << *error_msg;
-    }
-  }
-
-  if (!can_compile) {
-    *error_msg = "Not attempting to compile image because -Xnoimage-dex2oat";
-    return nullptr;
-  } else if (!dalvik_cache_exists) {
-    *error_msg = StringPrintf("No place to put generated image.");
-    return nullptr;
-  } else if (!ImageCreationAllowed(is_global_cache, error_msg)) {
-    return nullptr;
-  } else if (secondary_image) {
-    *error_msg = "Cannot compile a secondary image.";
-    return nullptr;
-  } else if (!GenerateImage(cache_filename, image_isa, error_msg)) {
-    *error_msg = StringPrintf("Failed to generate image '%s': %s",
-                              cache_filename.c_str(), error_msg->c_str());
-    // We failed to create files, remove any possibly garbage output.
-    // Since ImageCreationAllowed was true above, we are the zygote
-    // and therefore the only process expected to generate these for
-    // the device.
-    PruneDalvikCache(image_isa);
-    return nullptr;
-  } else {
-    // Check whether there is enough space left over after we have generated the image.
-    if (!CheckSpace(cache_filename, error_msg)) {
-      // No. Delete the generated image and try to run out of the dex files.
-      PruneDalvikCache(image_isa);
-      return nullptr;
-    }
-
-    // Note that we must not use the file descriptor associated with
-    // ScopedFlock::GetFile to Init the image file. We want the file
-    // descriptor (and the associated exclusive lock) to be released when
-    // we leave Create.
-    ScopedFlock image_lock;
-    image_lock.Init(cache_filename.c_str(), error_msg);
-    space = ImageSpace::Init(cache_filename.c_str(), image_location, true, nullptr, error_msg);
-    if (space == nullptr) {
-      *error_msg = StringPrintf("Failed to load generated image '%s': %s",
-                                cache_filename.c_str(), error_msg->c_str());
-    }
-    return space;
-  }
-}
-
 void ImageSpace::VerifyImageAllocations() {
   uint8_t* current = Begin() + RoundUp(sizeof(ImageHeader), kObjectAlignment);
   while (current < End()) {
@@ -651,846 +446,1168 @@
             << reinterpret_cast<const void*>(reloc.Dest() + reloc.Length()) << ")";
 }
 
-class FixupVisitor : public ValueObject {
+// Helper class encapsulating loading, so we can access private ImageSpace members (this is a
+// friend class), but not declare functions in the header.
+class ImageSpaceLoader {
  public:
-  FixupVisitor(const RelocationRange& boot_image,
-               const RelocationRange& boot_oat,
-               const RelocationRange& app_image,
-               const RelocationRange& app_oat)
-      : boot_image_(boot_image),
-        boot_oat_(boot_oat),
-        app_image_(app_image),
-        app_oat_(app_oat) {}
-
-  // Return the relocated address of a heap object.
-  template <typename T>
-  ALWAYS_INLINE T* ForwardObject(T* src) const {
-    const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
-    if (boot_image_.InSource(uint_src)) {
-      return reinterpret_cast<T*>(boot_image_.ToDest(uint_src));
-    }
-    if (app_image_.InSource(uint_src)) {
-      return reinterpret_cast<T*>(app_image_.ToDest(uint_src));
-    }
-    // Since we are fixing up the app image, there should only be pointers to the app image and
-    // boot image.
-    DCHECK(src == nullptr) << reinterpret_cast<const void*>(src);
-    return src;
-  }
-
-  // Return the relocated address of a code pointer (contained by an oat file).
-  ALWAYS_INLINE const void* ForwardCode(const void* src) const {
-    const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
-    if (boot_oat_.InSource(uint_src)) {
-      return reinterpret_cast<const void*>(boot_oat_.ToDest(uint_src));
-    }
-    if (app_oat_.InSource(uint_src)) {
-      return reinterpret_cast<const void*>(app_oat_.ToDest(uint_src));
-    }
-    DCHECK(src == nullptr) << src;
-    return src;
-  }
-
-  // Must be called on pointers that already have been relocated to the destination relocation.
-  ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
-    return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
-  }
-
- protected:
-  // Source section.
-  const RelocationRange boot_image_;
-  const RelocationRange boot_oat_;
-  const RelocationRange app_image_;
-  const RelocationRange app_oat_;
-};
-
-// Adapt for mirror::Class::FixupNativePointers.
-class FixupObjectAdapter : public FixupVisitor {
- public:
-  template<typename... Args>
-  explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
-
-  template <typename T>
-  T* operator()(T* obj) const {
-    return ForwardObject(obj);
-  }
-};
-
-class FixupRootVisitor : public FixupVisitor {
- public:
-  template<typename... Args>
-  explicit FixupRootVisitor(Args... args) : FixupVisitor(args...) {}
-
-  ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+  static std::unique_ptr<ImageSpace> Load(const char* image_location,
+                                          const std::string& image_filename,
+                                          bool is_zygote,
+                                          bool is_global_cache,
+                                          bool is_system,
+                                          bool relocated_version_used,
+                                          std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!root->IsNull()) {
-      VisitRoot(root);
-    }
+    // Note that we must not use the file descriptor associated with
+    // ScopedFlock::GetFile to Init the image file. We want the file
+    // descriptor (and the associated exclusive lock) to be released when
+    // we leave Create.
+    ScopedFlock image_lock;
+    // Should this be a RDWR lock? This is only a defensive measure, as at
+    // this point the image should exist.
+    // However, only the zygote can write into the global dalvik-cache, so
+    // restrict to zygote processes, or any process that isn't using
+    // /data/dalvik-cache (which we assume to be allowed to write there).
+    const bool rw_lock = is_zygote || !is_global_cache;
+    image_lock.Init(image_filename.c_str(),
+                    rw_lock ? (O_CREAT | O_RDWR) : O_RDONLY /* flags */,
+                    true /* block */,
+                    error_msg);
+    VLOG(startup) << "Using image file " << image_filename.c_str() << " for image location "
+                  << image_location;
+    // If we are in /system we can assume the image is good. We can also
+    // assume this if we are using a relocated image (i.e. image checksum
+    // matches) since this is only different by the offset. We need this to
+    // make sure that host tests continue to work.
+    // Since we are the boot image, pass null since we load the oat file from the boot image oat
+    // file name.
+    return Init(image_filename.c_str(),
+                image_location,
+                !(is_system || relocated_version_used),
+                /* oat_file */nullptr,
+                error_msg);
   }
 
-  ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+  static std::unique_ptr<ImageSpace> Init(const char* image_filename,
+                                          const char* image_location,
+                                          bool validate_oat_file,
+                                          const OatFile* oat_file,
+                                          std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::Object* ref = root->AsMirrorPtr();
-    mirror::Object* new_ref = ForwardObject(ref);
-    if (ref != new_ref) {
-      root->Assign(new_ref);
-    }
-  }
-};
+    CHECK(image_filename != nullptr);
+    CHECK(image_location != nullptr);
 
-class FixupObjectVisitor : public FixupVisitor {
- public:
-  template<typename... Args>
-  explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* visited,
-                              const size_t pointer_size,
-                              Args... args)
-      : FixupVisitor(args...),
-        pointer_size_(pointer_size),
-        visited_(visited) {}
+    TimingLogger logger(__PRETTY_FUNCTION__, true, VLOG_IS_ON(image));
+    VLOG(image) << "ImageSpace::Init entering image_filename=" << image_filename;
 
-  // Fix up separately since we also need to fix up method entrypoints.
-  ALWAYS_INLINE void VisitRootIfNonNull(
-      mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {}
-
-  ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED)
-      const {}
-
-  ALWAYS_INLINE void operator()(mirror::Object* obj,
-                                MemberOffset offset,
-                                bool is_static ATTRIBUTE_UNUSED) const
-      NO_THREAD_SAFETY_ANALYSIS {
-    // There could be overlap between ranges, we must avoid visiting the same reference twice.
-    // Avoid the class field since we already fixed it up in FixupClassVisitor.
-    if (offset.Uint32Value() != mirror::Object::ClassOffset().Uint32Value()) {
-      // Space is not yet added to the heap, don't do a read barrier.
-      mirror::Object* ref = obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(
-          offset);
-      // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
-      // image.
-      obj->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(offset, ForwardObject(ref));
-    }
-  }
-
-  // Visit a pointer array and forward corresponding native data. Ignores pointer arrays in the
-  // boot image. Uses the bitmap to ensure the same array is not visited multiple times.
-  template <typename Visitor>
-  void UpdatePointerArrayContents(mirror::PointerArray* array, const Visitor& visitor) const
-      NO_THREAD_SAFETY_ANALYSIS {
-    DCHECK(array != nullptr);
-    DCHECK(visitor.IsInAppImage(array));
-    // The bit for the array contents is different than the bit for the array. Since we may have
-    // already visited the array as a long / int array from walking the bitmap without knowing it
-    // was a pointer array.
-    static_assert(kObjectAlignment == 8u, "array bit may be in another object");
-    mirror::Object* const contents_bit = reinterpret_cast<mirror::Object*>(
-        reinterpret_cast<uintptr_t>(array) + kObjectAlignment);
-    // If the bit is not set then the contents have not yet been updated.
-    if (!visited_->Test(contents_bit)) {
-      array->Fixup<kVerifyNone, kWithoutReadBarrier>(array, pointer_size_, visitor);
-      visited_->Set(contents_bit);
-    }
-  }
-
-  // java.lang.ref.Reference visitor.
-  void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
-    mirror::Object* obj = ref->GetReferent<kWithoutReadBarrier>();
-    ref->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-        mirror::Reference::ReferentOffset(),
-        ForwardObject(obj));
-  }
-
-  void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
-    if (visited_->Test(obj)) {
-      // Already visited.
-      return;
-    }
-    visited_->Set(obj);
-
-    // Handle class specially first since we need it to be updated to properly visit the rest of
-    // the instance fields.
+    std::unique_ptr<File> file;
     {
-      mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
-      DCHECK(klass != nullptr) << "Null class in image";
-      // No AsClass since our fields aren't quite fixed up yet.
-      mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass));
-      if (klass != new_klass) {
-        obj->SetClass<kVerifyNone>(new_klass);
+      TimingLogger::ScopedTiming timing("OpenImageFile", &logger);
+      file.reset(OS::OpenFileForReading(image_filename));
+      if (file == nullptr) {
+        *error_msg = StringPrintf("Failed to open '%s'", image_filename);
+        return nullptr;
       }
-      if (new_klass != klass && IsInAppImage(new_klass)) {
-        // Make sure the klass contents are fixed up since we depend on it to walk the fields.
-        operator()(new_klass);
+    }
+    ImageHeader temp_image_header;
+    ImageHeader* image_header = &temp_image_header;
+    {
+      TimingLogger::ScopedTiming timing("ReadImageHeader", &logger);
+      bool success = file->ReadFully(image_header, sizeof(*image_header));
+      if (!success || !image_header->IsValid()) {
+        *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
+        return nullptr;
+      }
+    }
+    // Check that the file is larger or equal to the header size + data size.
+    const uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
+    if (image_file_size < sizeof(ImageHeader) + image_header->GetDataSize()) {
+      *error_msg = StringPrintf("Image file truncated: %" PRIu64 " vs. %" PRIu64 ".",
+                                image_file_size,
+                                sizeof(ImageHeader) + image_header->GetDataSize());
+      return nullptr;
+    }
+
+    if (oat_file != nullptr) {
+      // If we have an oat file, check the oat file checksum. The oat file is only non-null for the
+      // app image case. Otherwise, we open the oat file after the image and check the checksum there.
+      const uint32_t oat_checksum = oat_file->GetOatHeader().GetChecksum();
+      const uint32_t image_oat_checksum = image_header->GetOatChecksum();
+      if (oat_checksum != image_oat_checksum) {
+        *error_msg = StringPrintf("Oat checksum 0x%x does not match the image one 0x%x in image %s",
+                                  oat_checksum,
+                                  image_oat_checksum,
+                                  image_filename);
+        return nullptr;
       }
     }
 
-    obj->VisitReferences</*visit native roots*/false, kVerifyNone, kWithoutReadBarrier>(
-        *this,
-        *this);
-    // Note that this code relies on no circular dependencies.
-    // We want to use our own class loader and not the one in the image.
-    if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) {
-      mirror::Class* as_klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
-      FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
-      as_klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(as_klass,
-                                                                      pointer_size_,
-                                                                      visitor);
-      // Deal with the pointer arrays. Use the helper function since multiple classes can reference
-      // the same arrays.
-      mirror::PointerArray* const vtable = as_klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
-      if (vtable != nullptr && IsInAppImage(vtable)) {
-        operator()(vtable);
-        UpdatePointerArrayContents(vtable, visitor);
+    if (VLOG_IS_ON(startup)) {
+      LOG(INFO) << "Dumping image sections";
+      for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
+        const auto section_idx = static_cast<ImageHeader::ImageSections>(i);
+        auto& section = image_header->GetImageSection(section_idx);
+        LOG(INFO) << section_idx << " start="
+            << reinterpret_cast<void*>(image_header->GetImageBegin() + section.Offset()) << " "
+            << section;
       }
-      mirror::IfTable* iftable = as_klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
-      // Ensure iftable arrays are fixed up since we need GetMethodArray to return the valid
-      // contents.
-      if (iftable != nullptr && IsInAppImage(iftable)) {
-        operator()(iftable);
-        for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
-          if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
-            mirror::PointerArray* methods =
-                iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
-            if (visitor.IsInAppImage(methods)) {
-              operator()(methods);
-              DCHECK(methods != nullptr);
-              UpdatePointerArrayContents(methods, visitor);
+    }
+
+    const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap);
+    // The location we want to map from is the first aligned page after the end of the stored
+    // (possibly compressed) data.
+    const size_t image_bitmap_offset = RoundUp(sizeof(ImageHeader) + image_header->GetDataSize(),
+                                               kPageSize);
+    const size_t end_of_bitmap = image_bitmap_offset + bitmap_section.Size();
+    if (end_of_bitmap != image_file_size) {
+      *error_msg = StringPrintf(
+          "Image file size does not equal end of bitmap: size=%" PRIu64 " vs. %zu.", image_file_size,
+          end_of_bitmap);
+      return nullptr;
+    }
+
+    std::unique_ptr<MemMap> map;
+    // GetImageBegin is the preferred address to map the image. If we manage to map the
+    // image at the image begin, the amount of fixup work required is minimized.
+    map.reset(LoadImageFile(image_filename,
+                            image_location,
+                            *image_header,
+                            image_header->GetImageBegin(),
+                            file->Fd(),
+                            logger,
+                            error_msg));
+    // If the header specifies PIC mode, we can also map at a random low_4gb address since we can
+    // relocate in-place.
+    if (map == nullptr && image_header->IsPic()) {
+      map.reset(LoadImageFile(image_filename,
+                              image_location,
+                              *image_header,
+                              /* address */ nullptr,
+                              file->Fd(),
+                              logger,
+                              error_msg));
+    }
+    // Were we able to load something and continue?
+    if (map == nullptr) {
+      DCHECK(!error_msg->empty());
+      return nullptr;
+    }
+    DCHECK_EQ(0, memcmp(image_header, map->Begin(), sizeof(ImageHeader)));
+
+    std::unique_ptr<MemMap> image_bitmap_map(MemMap::MapFileAtAddress(nullptr,
+                                                                      bitmap_section.Size(),
+                                                                      PROT_READ, MAP_PRIVATE,
+                                                                      file->Fd(),
+                                                                      image_bitmap_offset,
+                                                                      /*low_4gb*/false,
+                                                                      /*reuse*/false,
+                                                                      image_filename,
+                                                                      error_msg));
+    if (image_bitmap_map == nullptr) {
+      *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
+      return nullptr;
+    }
+    // Loaded the map, use the image header from the file now in case we patch it with
+    // RelocateInPlace.
+    image_header = reinterpret_cast<ImageHeader*>(map->Begin());
+    const uint32_t bitmap_index = ImageSpace::bitmap_index_.FetchAndAddSequentiallyConsistent(1);
+    std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u",
+                                         image_filename,
+                                         bitmap_index));
+    // Bitmap only needs to cover until the end of the mirror objects section.
+    const ImageSection& image_objects = image_header->GetImageSection(ImageHeader::kSectionObjects);
+    // We only want the mirror object, not the ArtFields and ArtMethods.
+    uint8_t* const image_end = map->Begin() + image_objects.End();
+    std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap;
+    {
+      TimingLogger::ScopedTiming timing("CreateImageBitmap", &logger);
+      bitmap.reset(
+          accounting::ContinuousSpaceBitmap::CreateFromMemMap(
+              bitmap_name,
+              image_bitmap_map.release(),
+              reinterpret_cast<uint8_t*>(map->Begin()),
+              image_objects.End()));
+      if (bitmap == nullptr) {
+        *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
+        return nullptr;
+      }
+    }
+    {
+      TimingLogger::ScopedTiming timing("RelocateImage", &logger);
+      if (!RelocateInPlace(*image_header,
+                           map->Begin(),
+                           bitmap.get(),
+                           oat_file,
+                           error_msg)) {
+        return nullptr;
+      }
+    }
+    // We only want the mirror object, not the ArtFields and ArtMethods.
+    std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename,
+                                                     image_location,
+                                                     map.release(),
+                                                     bitmap.release(),
+                                                     image_end));
+
+    // VerifyImageAllocations() will be called later in Runtime::Init()
+    // as some class roots like ArtMethod::java_lang_reflect_ArtMethod_
+    // and ArtField::java_lang_reflect_ArtField_, which are used from
+    // Object::SizeOf() which VerifyImageAllocations() calls, are not
+    // set yet at this point.
+    if (oat_file == nullptr) {
+      TimingLogger::ScopedTiming timing("OpenOatFile", &logger);
+      space->oat_file_ = OpenOatFile(*space, image_filename, error_msg);
+      if (space->oat_file_ == nullptr) {
+        DCHECK(!error_msg->empty());
+        return nullptr;
+      }
+      space->oat_file_non_owned_ = space->oat_file_.get();
+    } else {
+      space->oat_file_non_owned_ = oat_file;
+    }
+
+    if (validate_oat_file) {
+      TimingLogger::ScopedTiming timing("ValidateOatFile", &logger);
+      CHECK(space->oat_file_ != nullptr);
+      if (!ValidateOatFile(*space, *space->oat_file_, error_msg)) {
+        DCHECK(!error_msg->empty());
+        return nullptr;
+      }
+    }
+
+    Runtime* runtime = Runtime::Current();
+
+    // If oat_file is null, then it is the boot image space. Use oat_file_non_owned_ from the space
+    // to set the runtime methods.
+    CHECK_EQ(oat_file != nullptr, image_header->IsAppImage());
+    if (image_header->IsAppImage()) {
+      CHECK_EQ(runtime->GetResolutionMethod(),
+               image_header->GetImageMethod(ImageHeader::kResolutionMethod));
+      CHECK_EQ(runtime->GetImtConflictMethod(),
+               image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
+      CHECK_EQ(runtime->GetImtUnimplementedMethod(),
+               image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves),
+               image_header->GetImageMethod(ImageHeader::kSaveAllCalleeSavesMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly),
+               image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs),
+               image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod));
+      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveEverything),
+               image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod));
+    } else if (!runtime->HasResolutionMethod()) {
+      runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet());
+      runtime->SetResolutionMethod(image_header->GetImageMethod(ImageHeader::kResolutionMethod));
+      runtime->SetImtConflictMethod(image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
+      runtime->SetImtUnimplementedMethod(
+          image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveAllCalleeSavesMethod),
+          Runtime::kSaveAllCalleeSaves);
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod), Runtime::kSaveRefsOnly);
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod),
+          Runtime::kSaveRefsAndArgs);
+      runtime->SetCalleeSaveMethod(
+          image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod), Runtime::kSaveEverything);
+    }
+
+    VLOG(image) << "ImageSpace::Init exiting " << *space.get();
+    if (VLOG_IS_ON(image)) {
+      logger.Dump(LOG(INFO));
+    }
+    return space;
+  }
+
+ private:
+  static MemMap* LoadImageFile(const char* image_filename,
+                               const char* image_location,
+                               const ImageHeader& image_header,
+                               uint8_t* address,
+                               int fd,
+                               TimingLogger& logger,
+                               std::string* error_msg) {
+    TimingLogger::ScopedTiming timing("MapImageFile", &logger);
+    const ImageHeader::StorageMode storage_mode = image_header.GetStorageMode();
+    if (storage_mode == ImageHeader::kStorageModeUncompressed) {
+      return MemMap::MapFileAtAddress(address,
+                                      image_header.GetImageSize(),
+                                      PROT_READ | PROT_WRITE,
+                                      MAP_PRIVATE,
+                                      fd,
+                                      0,
+                                      /*low_4gb*/true,
+                                      /*reuse*/false,
+                                      image_filename,
+                                      error_msg);
+    }
+
+    if (storage_mode != ImageHeader::kStorageModeLZ4 &&
+        storage_mode != ImageHeader::kStorageModeLZ4HC) {
+      *error_msg = StringPrintf("Invalid storage mode in image header %d",
+                                static_cast<int>(storage_mode));
+      return nullptr;
+    }
+
+    // Reserve output and decompress into it.
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous(image_location,
+                                                     address,
+                                                     image_header.GetImageSize(),
+                                                     PROT_READ | PROT_WRITE,
+                                                     /*low_4gb*/true,
+                                                     /*reuse*/false,
+                                                     error_msg));
+    if (map != nullptr) {
+      const size_t stored_size = image_header.GetDataSize();
+      const size_t decompress_offset = sizeof(ImageHeader);  // Skip the header.
+      std::unique_ptr<MemMap> temp_map(MemMap::MapFile(sizeof(ImageHeader) + stored_size,
+                                                       PROT_READ,
+                                                       MAP_PRIVATE,
+                                                       fd,
+                                                       /*offset*/0,
+                                                       /*low_4gb*/false,
+                                                       image_filename,
+                                                       error_msg));
+      if (temp_map == nullptr) {
+        DCHECK(!error_msg->empty());
+        return nullptr;
+      }
+      memcpy(map->Begin(), &image_header, sizeof(ImageHeader));
+      const uint64_t start = NanoTime();
+      // LZ4HC and LZ4 have same internal format, both use LZ4_decompress.
+      TimingLogger::ScopedTiming timing2("LZ4 decompress image", &logger);
+      const size_t decompressed_size = LZ4_decompress_safe(
+          reinterpret_cast<char*>(temp_map->Begin()) + sizeof(ImageHeader),
+          reinterpret_cast<char*>(map->Begin()) + decompress_offset,
+          stored_size,
+          map->Size() - decompress_offset);
+      VLOG(image) << "Decompressing image took " << PrettyDuration(NanoTime() - start);
+      if (decompressed_size + sizeof(ImageHeader) != image_header.GetImageSize()) {
+        *error_msg = StringPrintf(
+            "Decompressed size does not match expected image size %zu vs %zu",
+            decompressed_size + sizeof(ImageHeader),
+            image_header.GetImageSize());
+        return nullptr;
+      }
+    }
+
+    return map.release();
+  }
+
+  class FixupVisitor : public ValueObject {
+   public:
+    FixupVisitor(const RelocationRange& boot_image,
+                 const RelocationRange& boot_oat,
+                 const RelocationRange& app_image,
+                 const RelocationRange& app_oat)
+        : boot_image_(boot_image),
+          boot_oat_(boot_oat),
+          app_image_(app_image),
+          app_oat_(app_oat) {}
+
+    // Return the relocated address of a heap object.
+    template <typename T>
+    ALWAYS_INLINE T* ForwardObject(T* src) const {
+      const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
+      if (boot_image_.InSource(uint_src)) {
+        return reinterpret_cast<T*>(boot_image_.ToDest(uint_src));
+      }
+      if (app_image_.InSource(uint_src)) {
+        return reinterpret_cast<T*>(app_image_.ToDest(uint_src));
+      }
+      // Since we are fixing up the app image, there should only be pointers to the app image and
+      // boot image.
+      DCHECK(src == nullptr) << reinterpret_cast<const void*>(src);
+      return src;
+    }
+
+    // Return the relocated address of a code pointer (contained by an oat file).
+    ALWAYS_INLINE const void* ForwardCode(const void* src) const {
+      const uintptr_t uint_src = reinterpret_cast<uintptr_t>(src);
+      if (boot_oat_.InSource(uint_src)) {
+        return reinterpret_cast<const void*>(boot_oat_.ToDest(uint_src));
+      }
+      if (app_oat_.InSource(uint_src)) {
+        return reinterpret_cast<const void*>(app_oat_.ToDest(uint_src));
+      }
+      DCHECK(src == nullptr) << src;
+      return src;
+    }
+
+    // Must be called on pointers that already have been relocated to the destination relocation.
+    ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
+      return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
+    }
+
+   protected:
+    // Source section.
+    const RelocationRange boot_image_;
+    const RelocationRange boot_oat_;
+    const RelocationRange app_image_;
+    const RelocationRange app_oat_;
+  };
+
+  // Adapt for mirror::Class::FixupNativePointers.
+  class FixupObjectAdapter : public FixupVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
+
+    template <typename T>
+    T* operator()(T* obj) const {
+      return ForwardObject(obj);
+    }
+  };
+
+  class FixupRootVisitor : public FixupVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupRootVisitor(Args... args) : FixupVisitor(args...) {}
+
+    ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+      if (!root->IsNull()) {
+        VisitRoot(root);
+      }
+    }
+
+    ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+      mirror::Object* ref = root->AsMirrorPtr();
+      mirror::Object* new_ref = ForwardObject(ref);
+      if (ref != new_ref) {
+        root->Assign(new_ref);
+      }
+    }
+  };
+
+  class FixupObjectVisitor : public FixupVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* visited,
+                                const PointerSize pointer_size,
+                                Args... args)
+        : FixupVisitor(args...),
+          pointer_size_(pointer_size),
+          visited_(visited) {}
+
+    // Fix up separately since we also need to fix up method entrypoints.
+    ALWAYS_INLINE void VisitRootIfNonNull(
+        mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {}
+
+    ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED)
+        const {}
+
+    ALWAYS_INLINE void operator()(mirror::Object* obj,
+                                  MemberOffset offset,
+                                  bool is_static ATTRIBUTE_UNUSED) const
+        NO_THREAD_SAFETY_ANALYSIS {
+      // There could be overlap between ranges, we must avoid visiting the same reference twice.
+      // Avoid the class field since we already fixed it up in FixupClassVisitor.
+      if (offset.Uint32Value() != mirror::Object::ClassOffset().Uint32Value()) {
+        // Space is not yet added to the heap, don't do a read barrier.
+        mirror::Object* ref = obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(
+            offset);
+        // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+        // image.
+        obj->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(offset, ForwardObject(ref));
+      }
+    }
+
+    // Visit a pointer array and forward corresponding native data. Ignores pointer arrays in the
+    // boot image. Uses the bitmap to ensure the same array is not visited multiple times.
+    template <typename Visitor>
+    void UpdatePointerArrayContents(mirror::PointerArray* array, const Visitor& visitor) const
+        NO_THREAD_SAFETY_ANALYSIS {
+      DCHECK(array != nullptr);
+      DCHECK(visitor.IsInAppImage(array));
+      // The bit for the array contents is different than the bit for the array. Since we may have
+      // already visited the array as a long / int array from walking the bitmap without knowing it
+      // was a pointer array.
+      static_assert(kObjectAlignment == 8u, "array bit may be in another object");
+      mirror::Object* const contents_bit = reinterpret_cast<mirror::Object*>(
+          reinterpret_cast<uintptr_t>(array) + kObjectAlignment);
+      // If the bit is not set then the contents have not yet been updated.
+      if (!visited_->Test(contents_bit)) {
+        array->Fixup<kVerifyNone, kWithoutReadBarrier>(array, pointer_size_, visitor);
+        visited_->Set(contents_bit);
+      }
+    }
+
+    // java.lang.ref.Reference visitor.
+    void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
+        SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
+      mirror::Object* obj = ref->GetReferent<kWithoutReadBarrier>();
+      ref->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
+          mirror::Reference::ReferentOffset(),
+          ForwardObject(obj));
+    }
+
+    void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+      if (visited_->Test(obj)) {
+        // Already visited.
+        return;
+      }
+      visited_->Set(obj);
+
+      // Handle class specially first since we need it to be updated to properly visit the rest of
+      // the instance fields.
+      {
+        mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
+        DCHECK(klass != nullptr) << "Null class in image";
+        // No AsClass since our fields aren't quite fixed up yet.
+        mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass));
+        if (klass != new_klass) {
+          obj->SetClass<kVerifyNone>(new_klass);
+        }
+        if (new_klass != klass && IsInAppImage(new_klass)) {
+          // Make sure the klass contents are fixed up since we depend on it to walk the fields.
+          operator()(new_klass);
+        }
+      }
+
+      obj->VisitReferences</*visit native roots*/false, kVerifyNone, kWithoutReadBarrier>(
+          *this,
+          *this);
+      // Note that this code relies on no circular dependencies.
+      // We want to use our own class loader and not the one in the image.
+      if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) {
+        mirror::Class* as_klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
+        FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
+        as_klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(as_klass,
+                                                                        pointer_size_,
+                                                                        visitor);
+        // Deal with the pointer arrays. Use the helper function since multiple classes can reference
+        // the same arrays.
+        mirror::PointerArray* const vtable = as_klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
+        if (vtable != nullptr && IsInAppImage(vtable)) {
+          operator()(vtable);
+          UpdatePointerArrayContents(vtable, visitor);
+        }
+        mirror::IfTable* iftable = as_klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
+        // Ensure iftable arrays are fixed up since we need GetMethodArray to return the valid
+        // contents.
+        if (iftable != nullptr && IsInAppImage(iftable)) {
+          operator()(iftable);
+          for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
+            if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
+              mirror::PointerArray* methods =
+                  iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
+              if (visitor.IsInAppImage(methods)) {
+                operator()(methods);
+                DCHECK(methods != nullptr);
+                UpdatePointerArrayContents(methods, visitor);
+              }
             }
           }
         }
       }
     }
-  }
 
- private:
-  const size_t pointer_size_;
-  gc::accounting::ContinuousSpaceBitmap* const visited_;
-};
+   private:
+    const PointerSize pointer_size_;
+    gc::accounting::ContinuousSpaceBitmap* const visited_;
+  };
 
-class ForwardObjectAdapter {
- public:
-  ALWAYS_INLINE ForwardObjectAdapter(const FixupVisitor* visitor) : visitor_(visitor) {}
+  class ForwardObjectAdapter {
+   public:
+    ALWAYS_INLINE explicit ForwardObjectAdapter(const FixupVisitor* visitor) : visitor_(visitor) {}
 
-  template <typename T>
-  ALWAYS_INLINE T* operator()(T* src) const {
-    return visitor_->ForwardObject(src);
-  }
-
- private:
-  const FixupVisitor* const visitor_;
-};
-
-class ForwardCodeAdapter {
- public:
-  ALWAYS_INLINE ForwardCodeAdapter(const FixupVisitor* visitor)
-      : visitor_(visitor) {}
-
-  template <typename T>
-  ALWAYS_INLINE T* operator()(T* src) const {
-    return visitor_->ForwardCode(src);
-  }
-
- private:
-  const FixupVisitor* const visitor_;
-};
-
-class FixupArtMethodVisitor : public FixupVisitor, public ArtMethodVisitor {
- public:
-  template<typename... Args>
-  explicit FixupArtMethodVisitor(bool fixup_heap_objects, size_t pointer_size, Args... args)
-      : FixupVisitor(args...),
-        fixup_heap_objects_(fixup_heap_objects),
-        pointer_size_(pointer_size) {}
-
-  virtual void Visit(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
-    // TODO: Separate visitor for runtime vs normal methods.
-    if (UNLIKELY(method->IsRuntimeMethod())) {
-      ImtConflictTable* table = method->GetImtConflictTable(pointer_size_);
-      if (table != nullptr) {
-        ImtConflictTable* new_table = ForwardObject(table);
-        if (table != new_table) {
-          method->SetImtConflictTable(new_table, pointer_size_);
-        }
-      }
-      const void* old_code = method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
-      const void* new_code = ForwardCode(old_code);
-      if (old_code != new_code) {
-        method->SetEntryPointFromQuickCompiledCodePtrSize(new_code, pointer_size_);
-      }
-    } else {
-      if (fixup_heap_objects_) {
-        method->UpdateObjectsForImageRelocation(ForwardObjectAdapter(this), pointer_size_);
-      }
-      method->UpdateEntrypoints<kWithoutReadBarrier>(ForwardCodeAdapter(this), pointer_size_);
+    template <typename T>
+    ALWAYS_INLINE T* operator()(T* src) const {
+      return visitor_->ForwardObject(src);
     }
-  }
 
- private:
-  const bool fixup_heap_objects_;
-  const size_t pointer_size_;
-};
+   private:
+    const FixupVisitor* const visitor_;
+  };
 
-class FixupArtFieldVisitor : public FixupVisitor, public ArtFieldVisitor {
- public:
-  template<typename... Args>
-  explicit FixupArtFieldVisitor(Args... args) : FixupVisitor(args...) {}
+  class ForwardCodeAdapter {
+   public:
+    ALWAYS_INLINE explicit ForwardCodeAdapter(const FixupVisitor* visitor)
+        : visitor_(visitor) {}
 
-  virtual void Visit(ArtField* field) NO_THREAD_SAFETY_ANALYSIS {
-    field->UpdateObjects(ForwardObjectAdapter(this));
-  }
-};
+    template <typename T>
+    ALWAYS_INLINE T* operator()(T* src) const {
+      return visitor_->ForwardCode(src);
+    }
 
-// Relocate an image space mapped at target_base which possibly used to be at a different base
-// address. Only needs a single image space, not one for both source and destination.
-// In place means modifying a single ImageSpace in place rather than relocating from one ImageSpace
-// to another.
-static bool RelocateInPlace(ImageHeader& image_header,
-                            uint8_t* target_base,
-                            accounting::ContinuousSpaceBitmap* bitmap,
-                            const OatFile* app_oat_file,
-                            std::string* error_msg) {
-  DCHECK(error_msg != nullptr);
-  if (!image_header.IsPic()) {
-    if (image_header.GetImageBegin() == target_base) {
+   private:
+    const FixupVisitor* const visitor_;
+  };
+
+  class FixupArtMethodVisitor : public FixupVisitor, public ArtMethodVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupArtMethodVisitor(bool fixup_heap_objects, PointerSize pointer_size, Args... args)
+        : FixupVisitor(args...),
+          fixup_heap_objects_(fixup_heap_objects),
+          pointer_size_(pointer_size) {}
+
+    virtual void Visit(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
+      // TODO: Separate visitor for runtime vs normal methods.
+      if (UNLIKELY(method->IsRuntimeMethod())) {
+        ImtConflictTable* table = method->GetImtConflictTable(pointer_size_);
+        if (table != nullptr) {
+          ImtConflictTable* new_table = ForwardObject(table);
+          if (table != new_table) {
+            method->SetImtConflictTable(new_table, pointer_size_);
+          }
+        }
+        const void* old_code = method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+        const void* new_code = ForwardCode(old_code);
+        if (old_code != new_code) {
+          method->SetEntryPointFromQuickCompiledCodePtrSize(new_code, pointer_size_);
+        }
+      } else {
+        if (fixup_heap_objects_) {
+          method->UpdateObjectsForImageRelocation(ForwardObjectAdapter(this), pointer_size_);
+        }
+        method->UpdateEntrypoints<kWithoutReadBarrier>(ForwardCodeAdapter(this), pointer_size_);
+      }
+    }
+
+   private:
+    const bool fixup_heap_objects_;
+    const PointerSize pointer_size_;
+  };
+
+  class FixupArtFieldVisitor : public FixupVisitor, public ArtFieldVisitor {
+   public:
+    template<typename... Args>
+    explicit FixupArtFieldVisitor(Args... args) : FixupVisitor(args...) {}
+
+    virtual void Visit(ArtField* field) NO_THREAD_SAFETY_ANALYSIS {
+      field->UpdateObjects(ForwardObjectAdapter(this));
+    }
+  };
+
+  // Relocate an image space mapped at target_base which possibly used to be at a different base
+  // address. Only needs a single image space, not one for both source and destination.
+  // In place means modifying a single ImageSpace in place rather than relocating from one ImageSpace
+  // to another.
+  static bool RelocateInPlace(ImageHeader& image_header,
+                              uint8_t* target_base,
+                              accounting::ContinuousSpaceBitmap* bitmap,
+                              const OatFile* app_oat_file,
+                              std::string* error_msg) {
+    DCHECK(error_msg != nullptr);
+    if (!image_header.IsPic()) {
+      if (image_header.GetImageBegin() == target_base) {
+        return true;
+      }
+      *error_msg = StringPrintf("Cannot relocate non-pic image for oat file %s",
+                                (app_oat_file != nullptr) ? app_oat_file->GetLocation().c_str() : "");
+      return false;
+    }
+    // Set up sections.
+    uint32_t boot_image_begin = 0;
+    uint32_t boot_image_end = 0;
+    uint32_t boot_oat_begin = 0;
+    uint32_t boot_oat_end = 0;
+    const PointerSize pointer_size = image_header.GetPointerSize();
+    gc::Heap* const heap = Runtime::Current()->GetHeap();
+    heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end);
+    if (boot_image_begin == boot_image_end) {
+      *error_msg = "Can not relocate app image without boot image space";
+      return false;
+    }
+    if (boot_oat_begin == boot_oat_end) {
+      *error_msg = "Can not relocate app image without boot oat file";
+      return false;
+    }
+    const uint32_t boot_image_size = boot_image_end - boot_image_begin;
+    const uint32_t boot_oat_size = boot_oat_end - boot_oat_begin;
+    const uint32_t image_header_boot_image_size = image_header.GetBootImageSize();
+    const uint32_t image_header_boot_oat_size = image_header.GetBootOatSize();
+    if (boot_image_size != image_header_boot_image_size) {
+      *error_msg = StringPrintf("Boot image size %" PRIu64 " does not match expected size %"
+                                    PRIu64,
+                                static_cast<uint64_t>(boot_image_size),
+                                static_cast<uint64_t>(image_header_boot_image_size));
+      return false;
+    }
+    if (boot_oat_size != image_header_boot_oat_size) {
+      *error_msg = StringPrintf("Boot oat size %" PRIu64 " does not match expected size %"
+                                    PRIu64,
+                                static_cast<uint64_t>(boot_oat_size),
+                                static_cast<uint64_t>(image_header_boot_oat_size));
+      return false;
+    }
+    TimingLogger logger(__FUNCTION__, true, false);
+    RelocationRange boot_image(image_header.GetBootImageBegin(),
+                               boot_image_begin,
+                               boot_image_size);
+    RelocationRange boot_oat(image_header.GetBootOatBegin(),
+                             boot_oat_begin,
+                             boot_oat_size);
+    RelocationRange app_image(reinterpret_cast<uintptr_t>(image_header.GetImageBegin()),
+                              reinterpret_cast<uintptr_t>(target_base),
+                              image_header.GetImageSize());
+    // Use the oat data section since this is where the OatFile::Begin is.
+    RelocationRange app_oat(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
+                            // Not necessarily in low 4GB.
+                            reinterpret_cast<uintptr_t>(app_oat_file->Begin()),
+                            image_header.GetOatDataEnd() - image_header.GetOatDataBegin());
+    VLOG(image) << "App image " << app_image;
+    VLOG(image) << "App oat " << app_oat;
+    VLOG(image) << "Boot image " << boot_image;
+    VLOG(image) << "Boot oat " << boot_oat;
+    // True if we need to fixup any heap pointers, otherwise only code pointers.
+    const bool fixup_image = boot_image.Delta() != 0 || app_image.Delta() != 0;
+    const bool fixup_code = boot_oat.Delta() != 0 || app_oat.Delta() != 0;
+    if (!fixup_image && !fixup_code) {
+      // Nothing to fix up.
       return true;
     }
-    *error_msg = StringPrintf("Cannot relocate non-pic image for oat file %s",
-                              (app_oat_file != nullptr) ? app_oat_file->GetLocation().c_str() : "");
-    return false;
-  }
-  // Set up sections.
-  uint32_t boot_image_begin = 0;
-  uint32_t boot_image_end = 0;
-  uint32_t boot_oat_begin = 0;
-  uint32_t boot_oat_end = 0;
-  const size_t pointer_size = image_header.GetPointerSize();
-  gc::Heap* const heap = Runtime::Current()->GetHeap();
-  heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end);
-  if (boot_image_begin == boot_image_end) {
-    *error_msg = "Can not relocate app image without boot image space";
-    return false;
-  }
-  if (boot_oat_begin == boot_oat_end) {
-    *error_msg = "Can not relocate app image without boot oat file";
-    return false;
-  }
-  const uint32_t boot_image_size = boot_image_end - boot_image_begin;
-  const uint32_t boot_oat_size = boot_oat_end - boot_oat_begin;
-  const uint32_t image_header_boot_image_size = image_header.GetBootImageSize();
-  const uint32_t image_header_boot_oat_size = image_header.GetBootOatSize();
-  if (boot_image_size != image_header_boot_image_size) {
-    *error_msg = StringPrintf("Boot image size %" PRIu64 " does not match expected size %"
-                                  PRIu64,
-                              static_cast<uint64_t>(boot_image_size),
-                              static_cast<uint64_t>(image_header_boot_image_size));
-    return false;
-  }
-  if (boot_oat_size != image_header_boot_oat_size) {
-    *error_msg = StringPrintf("Boot oat size %" PRIu64 " does not match expected size %"
-                                  PRIu64,
-                              static_cast<uint64_t>(boot_oat_size),
-                              static_cast<uint64_t>(image_header_boot_oat_size));
-    return false;
-  }
-  TimingLogger logger(__FUNCTION__, true, false);
-  RelocationRange boot_image(image_header.GetBootImageBegin(),
-                             boot_image_begin,
-                             boot_image_size);
-  RelocationRange boot_oat(image_header.GetBootOatBegin(),
-                           boot_oat_begin,
-                           boot_oat_size);
-  RelocationRange app_image(reinterpret_cast<uintptr_t>(image_header.GetImageBegin()),
-                            reinterpret_cast<uintptr_t>(target_base),
-                            image_header.GetImageSize());
-  // Use the oat data section since this is where the OatFile::Begin is.
-  RelocationRange app_oat(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
-                          // Not necessarily in low 4GB.
-                          reinterpret_cast<uintptr_t>(app_oat_file->Begin()),
-                          image_header.GetOatDataEnd() - image_header.GetOatDataBegin());
-  VLOG(image) << "App image " << app_image;
-  VLOG(image) << "App oat " << app_oat;
-  VLOG(image) << "Boot image " << boot_image;
-  VLOG(image) << "Boot oat " << boot_oat;
-  // True if we need to fixup any heap pointers, otherwise only code pointers.
-  const bool fixup_image = boot_image.Delta() != 0 || app_image.Delta() != 0;
-  const bool fixup_code = boot_oat.Delta() != 0 || app_oat.Delta() != 0;
-  if (!fixup_image && !fixup_code) {
-    // Nothing to fix up.
-    return true;
-  }
-  ScopedDebugDisallowReadBarriers sddrb(Thread::Current());
-  // Need to update the image to be at the target base.
-  const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
-  uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
-  uintptr_t objects_end = reinterpret_cast<uintptr_t>(target_base + objects_section.End());
-  FixupObjectAdapter fixup_adapter(boot_image, boot_oat, app_image, app_oat);
-  if (fixup_image) {
-    // Two pass approach, fix up all classes first, then fix up non class-objects.
-    // The visited bitmap is used to ensure that pointer arrays are not forwarded twice.
-    std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> visited_bitmap(
-        gc::accounting::ContinuousSpaceBitmap::Create("Relocate bitmap",
-                                                      target_base,
-                                                      image_header.GetImageSize()));
-    FixupObjectVisitor fixup_object_visitor(visited_bitmap.get(),
-                                            pointer_size,
-                                            boot_image,
-                                            boot_oat,
-                                            app_image,
-                                            app_oat);
-    TimingLogger::ScopedTiming timing("Fixup classes", &logger);
-    // Fixup objects may read fields in the boot image, use the mutator lock here for sanity. Though
-    // its probably not required.
-    ScopedObjectAccess soa(Thread::Current());
-    timing.NewTiming("Fixup objects");
-    bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_object_visitor);
-    // Fixup image roots.
-    CHECK(app_image.InSource(reinterpret_cast<uintptr_t>(
-        image_header.GetImageRoots<kWithoutReadBarrier>())));
-    image_header.RelocateImageObjects(app_image.Delta());
-    CHECK_EQ(image_header.GetImageBegin(), target_base);
-    // Fix up dex cache DexFile pointers.
-    auto* dex_caches = image_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kDexCaches)->
-        AsObjectArray<mirror::DexCache, kVerifyNone, kWithoutReadBarrier>();
-    for (int32_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
-      mirror::DexCache* dex_cache = dex_caches->Get<kVerifyNone, kWithoutReadBarrier>(i);
-      // Fix up dex cache pointers.
-      GcRoot<mirror::String>* strings = dex_cache->GetStrings();
-      if (strings != nullptr) {
-        GcRoot<mirror::String>* new_strings = fixup_adapter.ForwardObject(strings);
-        if (strings != new_strings) {
-          dex_cache->SetStrings(new_strings);
+    ScopedDebugDisallowReadBarriers sddrb(Thread::Current());
+    // Need to update the image to be at the target base.
+    const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
+    uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
+    uintptr_t objects_end = reinterpret_cast<uintptr_t>(target_base + objects_section.End());
+    FixupObjectAdapter fixup_adapter(boot_image, boot_oat, app_image, app_oat);
+    if (fixup_image) {
+      // Two pass approach, fix up all classes first, then fix up non class-objects.
+      // The visited bitmap is used to ensure that pointer arrays are not forwarded twice.
+      std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> visited_bitmap(
+          gc::accounting::ContinuousSpaceBitmap::Create("Relocate bitmap",
+                                                        target_base,
+                                                        image_header.GetImageSize()));
+      FixupObjectVisitor fixup_object_visitor(visited_bitmap.get(),
+                                              pointer_size,
+                                              boot_image,
+                                              boot_oat,
+                                              app_image,
+                                              app_oat);
+      TimingLogger::ScopedTiming timing("Fixup classes", &logger);
+      // Fixup objects may read fields in the boot image, use the mutator lock here for sanity. Though
+      // its probably not required.
+      ScopedObjectAccess soa(Thread::Current());
+      timing.NewTiming("Fixup objects");
+      bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_object_visitor);
+      // Fixup image roots.
+      CHECK(app_image.InSource(reinterpret_cast<uintptr_t>(
+          image_header.GetImageRoots<kWithoutReadBarrier>())));
+      image_header.RelocateImageObjects(app_image.Delta());
+      CHECK_EQ(image_header.GetImageBegin(), target_base);
+      // Fix up dex cache DexFile pointers.
+      auto* dex_caches = image_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kDexCaches)->
+          AsObjectArray<mirror::DexCache, kVerifyNone, kWithoutReadBarrier>();
+      for (int32_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
+        mirror::DexCache* dex_cache = dex_caches->Get<kVerifyNone, kWithoutReadBarrier>(i);
+        // Fix up dex cache pointers.
+        mirror::StringDexCacheType* strings = dex_cache->GetStrings();
+        if (strings != nullptr) {
+          mirror::StringDexCacheType* new_strings = fixup_adapter.ForwardObject(strings);
+          if (strings != new_strings) {
+            dex_cache->SetStrings(new_strings);
+          }
+          dex_cache->FixupStrings<kWithoutReadBarrier>(new_strings, fixup_adapter);
         }
-        dex_cache->FixupStrings<kWithoutReadBarrier>(new_strings, fixup_adapter);
-      }
-      GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
-      if (types != nullptr) {
-        GcRoot<mirror::Class>* new_types = fixup_adapter.ForwardObject(types);
-        if (types != new_types) {
-          dex_cache->SetResolvedTypes(new_types);
+        GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
+        if (types != nullptr) {
+          GcRoot<mirror::Class>* new_types = fixup_adapter.ForwardObject(types);
+          if (types != new_types) {
+            dex_cache->SetResolvedTypes(new_types);
+          }
+          dex_cache->FixupResolvedTypes<kWithoutReadBarrier>(new_types, fixup_adapter);
         }
-        dex_cache->FixupResolvedTypes<kWithoutReadBarrier>(new_types, fixup_adapter);
-      }
-      ArtMethod** methods = dex_cache->GetResolvedMethods();
-      if (methods != nullptr) {
-        ArtMethod** new_methods = fixup_adapter.ForwardObject(methods);
-        if (methods != new_methods) {
-          dex_cache->SetResolvedMethods(new_methods);
-        }
-        for (size_t j = 0, num = dex_cache->NumResolvedMethods(); j != num; ++j) {
-          ArtMethod* orig = mirror::DexCache::GetElementPtrSize(new_methods, j, pointer_size);
-          ArtMethod* copy = fixup_adapter.ForwardObject(orig);
-          if (orig != copy) {
-            mirror::DexCache::SetElementPtrSize(new_methods, j, copy, pointer_size);
+        ArtMethod** methods = dex_cache->GetResolvedMethods();
+        if (methods != nullptr) {
+          ArtMethod** new_methods = fixup_adapter.ForwardObject(methods);
+          if (methods != new_methods) {
+            dex_cache->SetResolvedMethods(new_methods);
+          }
+          for (size_t j = 0, num = dex_cache->NumResolvedMethods(); j != num; ++j) {
+            ArtMethod* orig = mirror::DexCache::GetElementPtrSize(new_methods, j, pointer_size);
+            ArtMethod* copy = fixup_adapter.ForwardObject(orig);
+            if (orig != copy) {
+              mirror::DexCache::SetElementPtrSize(new_methods, j, copy, pointer_size);
+            }
           }
         }
-      }
-      ArtField** fields = dex_cache->GetResolvedFields();
-      if (fields != nullptr) {
-        ArtField** new_fields = fixup_adapter.ForwardObject(fields);
-        if (fields != new_fields) {
-          dex_cache->SetResolvedFields(new_fields);
-        }
-        for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
-          ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, pointer_size);
-          ArtField* copy = fixup_adapter.ForwardObject(orig);
-          if (orig != copy) {
-            mirror::DexCache::SetElementPtrSize(new_fields, j, copy, pointer_size);
+        ArtField** fields = dex_cache->GetResolvedFields();
+        if (fields != nullptr) {
+          ArtField** new_fields = fixup_adapter.ForwardObject(fields);
+          if (fields != new_fields) {
+            dex_cache->SetResolvedFields(new_fields);
+          }
+          for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
+            ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, pointer_size);
+            ArtField* copy = fixup_adapter.ForwardObject(orig);
+            if (orig != copy) {
+              mirror::DexCache::SetElementPtrSize(new_fields, j, copy, pointer_size);
+            }
           }
         }
       }
     }
-  }
-  {
-    // Only touches objects in the app image, no need for mutator lock.
-    TimingLogger::ScopedTiming timing("Fixup methods", &logger);
-    FixupArtMethodVisitor method_visitor(fixup_image,
-                                         pointer_size,
-                                         boot_image,
-                                         boot_oat,
-                                         app_image,
-                                         app_oat);
-    image_header.VisitPackedArtMethods(&method_visitor, target_base, pointer_size);
-  }
-  if (fixup_image) {
     {
       // Only touches objects in the app image, no need for mutator lock.
-      TimingLogger::ScopedTiming timing("Fixup fields", &logger);
-      FixupArtFieldVisitor field_visitor(boot_image, boot_oat, app_image, app_oat);
-      image_header.VisitPackedArtFields(&field_visitor, target_base);
+      TimingLogger::ScopedTiming timing("Fixup methods", &logger);
+      FixupArtMethodVisitor method_visitor(fixup_image,
+                                           pointer_size,
+                                           boot_image,
+                                           boot_oat,
+                                           app_image,
+                                           app_oat);
+      image_header.VisitPackedArtMethods(&method_visitor, target_base, pointer_size);
     }
-    {
-      TimingLogger::ScopedTiming timing("Fixup imt", &logger);
-      image_header.VisitPackedImTables(fixup_adapter, target_base, pointer_size);
+    if (fixup_image) {
+      {
+        // Only touches objects in the app image, no need for mutator lock.
+        TimingLogger::ScopedTiming timing("Fixup fields", &logger);
+        FixupArtFieldVisitor field_visitor(boot_image, boot_oat, app_image, app_oat);
+        image_header.VisitPackedArtFields(&field_visitor, target_base);
+      }
+      {
+        TimingLogger::ScopedTiming timing("Fixup imt", &logger);
+        image_header.VisitPackedImTables(fixup_adapter, target_base, pointer_size);
+      }
+      {
+        TimingLogger::ScopedTiming timing("Fixup conflict tables", &logger);
+        image_header.VisitPackedImtConflictTables(fixup_adapter, target_base, pointer_size);
+      }
+      // In the app image case, the image methods are actually in the boot image.
+      image_header.RelocateImageMethods(boot_image.Delta());
+      const auto& class_table_section = image_header.GetImageSection(ImageHeader::kSectionClassTable);
+      if (class_table_section.Size() > 0u) {
+        // Note that we require that ReadFromMemory does not make an internal copy of the elements.
+        // This also relies on visit roots not doing any verification which could fail after we update
+        // the roots to be the image addresses.
+        ScopedObjectAccess soa(Thread::Current());
+        WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+        ClassTable temp_table;
+        temp_table.ReadFromMemory(target_base + class_table_section.Offset());
+        FixupRootVisitor root_visitor(boot_image, boot_oat, app_image, app_oat);
+        temp_table.VisitRoots(root_visitor);
+      }
     }
-    {
-      TimingLogger::ScopedTiming timing("Fixup conflict tables", &logger);
-      image_header.VisitPackedImtConflictTables(fixup_adapter, target_base, pointer_size);
+    if (VLOG_IS_ON(image)) {
+      logger.Dump(LOG(INFO));
     }
-    // In the app image case, the image methods are actually in the boot image.
-    image_header.RelocateImageMethods(boot_image.Delta());
-    const auto& class_table_section = image_header.GetImageSection(ImageHeader::kSectionClassTable);
-    if (class_table_section.Size() > 0u) {
-      // Note that we require that ReadFromMemory does not make an internal copy of the elements.
-      // This also relies on visit roots not doing any verification which could fail after we update
-      // the roots to be the image addresses.
-      ScopedObjectAccess soa(Thread::Current());
-      WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-      ClassTable temp_table;
-      temp_table.ReadFromMemory(target_base + class_table_section.Offset());
-      FixupRootVisitor root_visitor(boot_image, boot_oat, app_image, app_oat);
-      temp_table.VisitRoots(root_visitor);
-    }
+    return true;
   }
-  if (VLOG_IS_ON(image)) {
-    logger.Dump(LOG(INFO));
-  }
-  return true;
-}
 
-ImageSpace* ImageSpace::Init(const char* image_filename,
-                             const char* image_location,
-                             bool validate_oat_file,
-                             const OatFile* oat_file,
-                             std::string* error_msg) {
-  CHECK(image_filename != nullptr);
-  CHECK(image_location != nullptr);
+  static std::unique_ptr<OatFile> OpenOatFile(const ImageSpace& image,
+                                              const char* image_path,
+                                              std::string* error_msg) {
+    const ImageHeader& image_header = image.GetImageHeader();
+    std::string oat_filename = ImageHeader::GetOatLocationFromImageLocation(image_path);
 
-  TimingLogger logger(__PRETTY_FUNCTION__, true, VLOG_IS_ON(image));
-  VLOG(image) << "ImageSpace::Init entering image_filename=" << image_filename;
+    CHECK(image_header.GetOatDataBegin() != nullptr);
 
-  std::unique_ptr<File> file;
-  {
-    TimingLogger::ScopedTiming timing("OpenImageFile", &logger);
-    file.reset(OS::OpenFileForReading(image_filename));
-    if (file == nullptr) {
-      *error_msg = StringPrintf("Failed to open '%s'", image_filename);
+    std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_filename,
+                                                    oat_filename,
+                                                    image_header.GetOatDataBegin(),
+                                                    image_header.GetOatFileBegin(),
+                                                    !Runtime::Current()->IsAotCompiler(),
+                                                    /*low_4gb*/false,
+                                                    nullptr,
+                                                    error_msg));
+    if (oat_file == nullptr) {
+      *error_msg = StringPrintf("Failed to open oat file '%s' referenced from image %s: %s",
+                                oat_filename.c_str(),
+                                image.GetName(),
+                                error_msg->c_str());
       return nullptr;
     }
-  }
-  ImageHeader temp_image_header;
-  ImageHeader* image_header = &temp_image_header;
-  {
-    TimingLogger::ScopedTiming timing("ReadImageHeader", &logger);
-    bool success = file->ReadFully(image_header, sizeof(*image_header));
-    if (!success || !image_header->IsValid()) {
-      *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
-      return nullptr;
-    }
-  }
-  // Check that the file is larger or equal to the header size + data size.
-  const uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
-  if (image_file_size < sizeof(ImageHeader) + image_header->GetDataSize()) {
-    *error_msg = StringPrintf("Image file truncated: %" PRIu64 " vs. %" PRIu64 ".",
-                              image_file_size,
-                              sizeof(ImageHeader) + image_header->GetDataSize());
-    return nullptr;
-  }
-
-  if (oat_file != nullptr) {
-    // If we have an oat file, check the oat file checksum. The oat file is only non-null for the
-    // app image case. Otherwise, we open the oat file after the image and check the checksum there.
-    const uint32_t oat_checksum = oat_file->GetOatHeader().GetChecksum();
-    const uint32_t image_oat_checksum = image_header->GetOatChecksum();
+    uint32_t oat_checksum = oat_file->GetOatHeader().GetChecksum();
+    uint32_t image_oat_checksum = image_header.GetOatChecksum();
     if (oat_checksum != image_oat_checksum) {
-      *error_msg = StringPrintf("Oat checksum 0x%x does not match the image one 0x%x in image %s",
+      *error_msg = StringPrintf("Failed to match oat file checksum 0x%x to expected oat checksum 0x%x"
+                                " in image %s",
                                 oat_checksum,
                                 image_oat_checksum,
-                                image_filename);
+                                image.GetName());
       return nullptr;
     }
-  }
-
-  if (VLOG_IS_ON(startup)) {
-    LOG(INFO) << "Dumping image sections";
-    for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
-      const auto section_idx = static_cast<ImageHeader::ImageSections>(i);
-      auto& section = image_header->GetImageSection(section_idx);
-      LOG(INFO) << section_idx << " start="
-                << reinterpret_cast<void*>(image_header->GetImageBegin() + section.Offset()) << " "
-                << section;
+    int32_t image_patch_delta = image_header.GetPatchDelta();
+    int32_t oat_patch_delta = oat_file->GetOatHeader().GetImagePatchDelta();
+    if (oat_patch_delta != image_patch_delta && !image_header.CompilePic()) {
+      // We should have already relocated by this point. Bail out.
+      *error_msg = StringPrintf("Failed to match oat file patch delta %d to expected patch delta %d "
+                                "in image %s",
+                                oat_patch_delta,
+                                image_patch_delta,
+                                image.GetName());
+      return nullptr;
     }
+
+    return oat_file;
   }
 
-  const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap);
-  // The location we want to map from is the first aligned page after the end of the stored
-  // (possibly compressed) data.
-  const size_t image_bitmap_offset = RoundUp(sizeof(ImageHeader) + image_header->GetDataSize(),
-                                             kPageSize);
-  const size_t end_of_bitmap = image_bitmap_offset + bitmap_section.Size();
-  if (end_of_bitmap != image_file_size) {
-    *error_msg = StringPrintf(
-        "Image file size does not equal end of bitmap: size=%" PRIu64 " vs. %zu.", image_file_size,
-        end_of_bitmap);
-    return nullptr;
-  }
-
-  // The preferred address to map the image, null specifies any address. If we manage to map the
-  // image at the image begin, the amount of fixup work required is minimized.
-  std::vector<uint8_t*> addresses(1, image_header->GetImageBegin());
-  if (image_header->IsPic()) {
-    // Can also map at a random low_4gb address since we can relocate in-place.
-    addresses.push_back(nullptr);
-  }
-
-  // Note: The image header is part of the image due to mmap page alignment required of offset.
-  std::unique_ptr<MemMap> map;
-  std::string temp_error_msg;
-  for (uint8_t* address : addresses) {
-    TimingLogger::ScopedTiming timing("MapImageFile", &logger);
-    // Only care about the error message for the last address in addresses. We want to avoid the
-    // overhead of printing the process maps if we can relocate.
-    std::string* out_error_msg = (address == addresses.back()) ? &temp_error_msg : nullptr;
-    const ImageHeader::StorageMode storage_mode = image_header->GetStorageMode();
-    if (storage_mode == ImageHeader::kStorageModeUncompressed) {
-      map.reset(MemMap::MapFileAtAddress(address,
-                                         image_header->GetImageSize(),
-                                         PROT_READ | PROT_WRITE,
-                                         MAP_PRIVATE,
-                                         file->Fd(),
-                                         0,
-                                         /*low_4gb*/true,
-                                         /*reuse*/false,
-                                         image_filename,
-                                         /*out*/out_error_msg));
-    } else {
-      if (storage_mode != ImageHeader::kStorageModeLZ4 &&
-          storage_mode != ImageHeader::kStorageModeLZ4HC) {
-        *error_msg = StringPrintf("Invalid storage mode in image header %d",
-                                  static_cast<int>(storage_mode));
-        return nullptr;
+  static bool ValidateOatFile(const ImageSpace& space,
+                              const OatFile& oat_file,
+                              std::string* error_msg) {
+    for (const OatFile::OatDexFile* oat_dex_file : oat_file.GetOatDexFiles()) {
+      const std::string& dex_file_location = oat_dex_file->GetDexFileLocation();
+      uint32_t dex_file_location_checksum;
+      if (!DexFile::GetChecksum(dex_file_location.c_str(), &dex_file_location_checksum, error_msg)) {
+        *error_msg = StringPrintf("Failed to get checksum of dex file '%s' referenced by image %s: "
+                                  "%s",
+                                  dex_file_location.c_str(),
+                                  space.GetName(),
+                                  error_msg->c_str());
+        return false;
       }
-      // Reserve output and decompress into it.
-      map.reset(MemMap::MapAnonymous(image_location,
-                                     address,
-                                     image_header->GetImageSize(),
-                                     PROT_READ | PROT_WRITE,
-                                     /*low_4gb*/true,
-                                     /*reuse*/false,
-                                     /*out*/out_error_msg));
-      if (map != nullptr) {
-        const size_t stored_size = image_header->GetDataSize();
-        const size_t decompress_offset = sizeof(ImageHeader);  // Skip the header.
-        std::unique_ptr<MemMap> temp_map(MemMap::MapFile(sizeof(ImageHeader) + stored_size,
-                                                         PROT_READ,
-                                                         MAP_PRIVATE,
-                                                         file->Fd(),
-                                                         /*offset*/0,
-                                                         /*low_4gb*/false,
-                                                         image_filename,
-                                                         out_error_msg));
-        if (temp_map == nullptr) {
-          DCHECK(!out_error_msg->empty());
-          return nullptr;
-        }
-        memcpy(map->Begin(), image_header, sizeof(ImageHeader));
-        const uint64_t start = NanoTime();
-        // LZ4HC and LZ4 have same internal format, both use LZ4_decompress.
-        TimingLogger::ScopedTiming timing2("LZ4 decompress image", &logger);
-        const size_t decompressed_size = LZ4_decompress_safe(
-            reinterpret_cast<char*>(temp_map->Begin()) + sizeof(ImageHeader),
-            reinterpret_cast<char*>(map->Begin()) + decompress_offset,
-            stored_size,
-            map->Size() - decompress_offset);
-        VLOG(image) << "Decompressing image took " << PrettyDuration(NanoTime() - start);
-        if (decompressed_size + sizeof(ImageHeader) != image_header->GetImageSize()) {
-          *error_msg = StringPrintf(
-              "Decompressed size does not match expected image size %zu vs %zu",
-              decompressed_size + sizeof(ImageHeader),
-              image_header->GetImageSize());
-          return nullptr;
-        }
+      if (dex_file_location_checksum != oat_dex_file->GetDexFileLocationChecksum()) {
+        *error_msg = StringPrintf("ValidateOatFile found checksum mismatch between oat file '%s' and "
+                                  "dex file '%s' (0x%x != 0x%x)",
+                                  oat_file.GetLocation().c_str(),
+                                  dex_file_location.c_str(),
+                                  oat_dex_file->GetDexFileLocationChecksum(),
+                                  dex_file_location_checksum);
+        return false;
       }
     }
-    if (map != nullptr) {
-      break;
-    }
+    return true;
+  }
+};
+
+static constexpr uint64_t kLowSpaceValue = 50 * MB;
+static constexpr uint64_t kTmpFsSentinelValue = 384 * MB;
+
+// Read the free space of the cache partition and make a decision whether to keep the generated
+// image. This is to try to mitigate situations where the system might run out of space later.
+static bool CheckSpace(const std::string& cache_filename, std::string* error_msg) {
+  // Using statvfs vs statvfs64 because of b/18207376, and it is enough for all practical purposes.
+  struct statvfs buf;
+
+  int res = TEMP_FAILURE_RETRY(statvfs(cache_filename.c_str(), &buf));
+  if (res != 0) {
+    // Could not stat. Conservatively tell the system to delete the image.
+    *error_msg = "Could not stat the filesystem, assuming low-memory situation.";
+    return false;
   }
 
-  if (map == nullptr) {
-    DCHECK(!temp_error_msg.empty());
-    *error_msg = temp_error_msg;
-    return nullptr;
-  }
-  DCHECK_EQ(0, memcmp(image_header, map->Begin(), sizeof(ImageHeader)));
+  uint64_t fs_overall_size = buf.f_bsize * static_cast<uint64_t>(buf.f_blocks);
+  // Zygote is privileged, but other things are not. Use bavail.
+  uint64_t fs_free_size = buf.f_bsize * static_cast<uint64_t>(buf.f_bavail);
 
-  std::unique_ptr<MemMap> image_bitmap_map(MemMap::MapFileAtAddress(nullptr,
-                                                                    bitmap_section.Size(),
-                                                                    PROT_READ, MAP_PRIVATE,
-                                                                    file->Fd(),
-                                                                    image_bitmap_offset,
-                                                                    /*low_4gb*/false,
-                                                                    /*reuse*/false,
-                                                                    image_filename,
-                                                                    error_msg));
-  if (image_bitmap_map == nullptr) {
-    *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
-    return nullptr;
-  }
-  // Loaded the map, use the image header from the file now in case we patch it with
-  // RelocateInPlace.
-  image_header = reinterpret_cast<ImageHeader*>(map->Begin());
-  const uint32_t bitmap_index = bitmap_index_.FetchAndAddSequentiallyConsistent(1);
-  std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u",
-                                       image_filename,
-                                       bitmap_index));
-  // Bitmap only needs to cover until the end of the mirror objects section.
-  const ImageSection& image_objects = image_header->GetImageSection(ImageHeader::kSectionObjects);
-  // We only want the mirror object, not the ArtFields and ArtMethods.
-  uint8_t* const image_end = map->Begin() + image_objects.End();
-  std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap;
-  {
-    TimingLogger::ScopedTiming timing("CreateImageBitmap", &logger);
-    bitmap.reset(
-      accounting::ContinuousSpaceBitmap::CreateFromMemMap(
-          bitmap_name,
-          image_bitmap_map.release(),
-          reinterpret_cast<uint8_t*>(map->Begin()),
-          image_objects.End()));
-    if (bitmap == nullptr) {
-      *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
-      return nullptr;
-    }
-  }
-  {
-    TimingLogger::ScopedTiming timing("RelocateImage", &logger);
-    if (!RelocateInPlace(*image_header,
-                         map->Begin(),
-                         bitmap.get(),
-                         oat_file,
-                         error_msg)) {
-      return nullptr;
-    }
-  }
-  // We only want the mirror object, not the ArtFields and ArtMethods.
-  std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename,
-                                                   image_location,
-                                                   map.release(),
-                                                   bitmap.release(),
-                                                   image_end));
-
-  // VerifyImageAllocations() will be called later in Runtime::Init()
-  // as some class roots like ArtMethod::java_lang_reflect_ArtMethod_
-  // and ArtField::java_lang_reflect_ArtField_, which are used from
-  // Object::SizeOf() which VerifyImageAllocations() calls, are not
-  // set yet at this point.
-  if (oat_file == nullptr) {
-    TimingLogger::ScopedTiming timing("OpenOatFile", &logger);
-    space->oat_file_.reset(space->OpenOatFile(image_filename, error_msg));
-    if (space->oat_file_ == nullptr) {
-      DCHECK(!error_msg->empty());
-      return nullptr;
-    }
-    space->oat_file_non_owned_ = space->oat_file_.get();
-  } else {
-    space->oat_file_non_owned_ = oat_file;
-  }
-
-  if (validate_oat_file) {
-    TimingLogger::ScopedTiming timing("ValidateOatFile", &logger);
-    if (!space->ValidateOatFile(error_msg)) {
-     DCHECK(!error_msg->empty());
-      return nullptr;
-    }
-  }
-
-  Runtime* runtime = Runtime::Current();
-
-  // If oat_file is null, then it is the boot image space. Use oat_file_non_owned_ from the space
-  // to set the runtime methods.
-  CHECK_EQ(oat_file != nullptr, image_header->IsAppImage());
-  if (image_header->IsAppImage()) {
-    CHECK_EQ(runtime->GetResolutionMethod(),
-             image_header->GetImageMethod(ImageHeader::kResolutionMethod));
-    CHECK_EQ(runtime->GetImtConflictMethod(),
-             image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
-    CHECK_EQ(runtime->GetImtUnimplementedMethod(),
-             image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
-    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveAll),
-             image_header->GetImageMethod(ImageHeader::kCalleeSaveMethod));
-    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kRefsOnly),
-             image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod));
-    CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs),
-             image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod));
-  } else if (!runtime->HasResolutionMethod()) {
-    runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet());
-    runtime->SetResolutionMethod(image_header->GetImageMethod(ImageHeader::kResolutionMethod));
-    runtime->SetImtConflictMethod(image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
-    runtime->SetImtUnimplementedMethod(
-        image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
-    runtime->SetCalleeSaveMethod(
-        image_header->GetImageMethod(ImageHeader::kCalleeSaveMethod), Runtime::kSaveAll);
-    runtime->SetCalleeSaveMethod(
-        image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly);
-    runtime->SetCalleeSaveMethod(
-        image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs);
-  }
-
-  VLOG(image) << "ImageSpace::Init exiting " << *space.get();
-  if (VLOG_IS_ON(image)) {
-    logger.Dump(LOG(INFO));
-  }
-  return space.release();
-}
-
-OatFile* ImageSpace::OpenOatFile(const char* image_path, std::string* error_msg) const {
-  const ImageHeader& image_header = GetImageHeader();
-  std::string oat_filename = ImageHeader::GetOatLocationFromImageLocation(image_path);
-
-  CHECK(image_header.GetOatDataBegin() != nullptr);
-
-  OatFile* oat_file = OatFile::Open(oat_filename,
-                                    oat_filename,
-                                    image_header.GetOatDataBegin(),
-                                    image_header.GetOatFileBegin(),
-                                    !Runtime::Current()->IsAotCompiler(),
-                                    /*low_4gb*/false,
-                                    nullptr,
-                                    error_msg);
-  if (oat_file == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat file '%s' referenced from image %s: %s",
-                              oat_filename.c_str(), GetName(), error_msg->c_str());
-    return nullptr;
-  }
-  uint32_t oat_checksum = oat_file->GetOatHeader().GetChecksum();
-  uint32_t image_oat_checksum = image_header.GetOatChecksum();
-  if (oat_checksum != image_oat_checksum) {
-    *error_msg = StringPrintf("Failed to match oat file checksum 0x%x to expected oat checksum 0x%x"
-                              " in image %s", oat_checksum, image_oat_checksum, GetName());
-    return nullptr;
-  }
-  int32_t image_patch_delta = image_header.GetPatchDelta();
-  int32_t oat_patch_delta = oat_file->GetOatHeader().GetImagePatchDelta();
-  if (oat_patch_delta != image_patch_delta && !image_header.CompilePic()) {
-    // We should have already relocated by this point. Bail out.
-    *error_msg = StringPrintf("Failed to match oat file patch delta %d to expected patch delta %d "
-                              "in image %s", oat_patch_delta, image_patch_delta, GetName());
-    return nullptr;
-  }
-
-  return oat_file;
-}
-
-bool ImageSpace::ValidateOatFile(std::string* error_msg) const {
-  CHECK(oat_file_.get() != nullptr);
-  for (const OatFile::OatDexFile* oat_dex_file : oat_file_->GetOatDexFiles()) {
-    const std::string& dex_file_location = oat_dex_file->GetDexFileLocation();
-    uint32_t dex_file_location_checksum;
-    if (!DexFile::GetChecksum(dex_file_location.c_str(), &dex_file_location_checksum, error_msg)) {
-      *error_msg = StringPrintf("Failed to get checksum of dex file '%s' referenced by image %s: "
-                                "%s", dex_file_location.c_str(), GetName(), error_msg->c_str());
-      return false;
-    }
-    if (dex_file_location_checksum != oat_dex_file->GetDexFileLocationChecksum()) {
-      *error_msg = StringPrintf("ValidateOatFile found checksum mismatch between oat file '%s' and "
-                                "dex file '%s' (0x%x != 0x%x)",
-                                oat_file_->GetLocation().c_str(), dex_file_location.c_str(),
-                                oat_dex_file->GetDexFileLocationChecksum(),
-                                dex_file_location_checksum);
+  // Take the overall size as an indicator for a tmpfs, which is being used for the decryption
+  // environment. We do not want to fail quickening the boot image there, as it is beneficial
+  // for time-to-UI.
+  if (fs_overall_size > kTmpFsSentinelValue) {
+    if (fs_free_size < kLowSpaceValue) {
+      *error_msg = StringPrintf("Low-memory situation: only %4.2f megabytes available, need at "
+                                "least %" PRIu64 ".",
+                                static_cast<double>(fs_free_size) / MB,
+                                kLowSpaceValue / MB);
       return false;
     }
   }
   return true;
 }
 
+std::unique_ptr<ImageSpace> ImageSpace::CreateBootImage(const char* image_location,
+                                                        const InstructionSet image_isa,
+                                                        bool secondary_image,
+                                                        std::string* error_msg) {
+  ScopedTrace trace(__FUNCTION__);
+
+  // Step 0: Extra zygote work.
+
+  // Step 0.a: If we're the zygote, mark boot.
+  const bool is_zygote = Runtime::Current()->IsZygote();
+  if (is_zygote && !secondary_image) {
+    MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots());
+  }
+
+  // Step 0.b: If we're the zygote, check for free space, and prune the cache preemptively,
+  //           if necessary. While the runtime may be fine (it is pretty tolerant to
+  //           out-of-disk-space situations), other parts of the platform are not.
+  //
+  //           The advantage of doing this proactively is that the later steps are simplified,
+  //           i.e., we do not need to code retries.
+  std::string system_filename;
+  bool has_system = false;
+  std::string cache_filename;
+  bool has_cache = false;
+  bool dalvik_cache_exists = false;
+  bool is_global_cache = true;
+  std::string dalvik_cache;
+  bool found_image = FindImageFilenameImpl(image_location,
+                                           image_isa,
+                                           &has_system,
+                                           &system_filename,
+                                           &dalvik_cache_exists,
+                                           &dalvik_cache,
+                                           &is_global_cache,
+                                           &has_cache,
+                                           &cache_filename);
+
+  if (is_zygote && dalvik_cache_exists) {
+    DCHECK(!dalvik_cache.empty());
+    std::string local_error_msg;
+    if (!CheckSpace(dalvik_cache, &local_error_msg)) {
+      LOG(WARNING) << local_error_msg << " Preemptively pruning the dalvik cache.";
+      PruneDalvikCache(image_isa);
+
+      // Re-evaluate the image.
+      found_image = FindImageFilenameImpl(image_location,
+                                          image_isa,
+                                          &has_system,
+                                          &system_filename,
+                                          &dalvik_cache_exists,
+                                          &dalvik_cache,
+                                          &is_global_cache,
+                                          &has_cache,
+                                          &cache_filename);
+    }
+  }
+
+  // Collect all the errors.
+  std::vector<std::string> error_msgs;
+
+  // Step 1: Check if we have an existing and relocated image.
+
+  // Step 1.a: Have files in system and cache. Then they need to match.
+  if (found_image && has_system && has_cache) {
+    std::string local_error_msg;
+    // Check that the files are matching.
+    if (ChecksumsMatch(system_filename.c_str(), cache_filename.c_str(), &local_error_msg)) {
+      std::unique_ptr<ImageSpace> relocated_space =
+          ImageSpaceLoader::Load(image_location,
+                                 cache_filename,
+                                 is_zygote,
+                                 is_global_cache,
+                                 /* is_system */ false,
+                                 /* relocated_version_used */ true,
+                                 &local_error_msg);
+      if (relocated_space != nullptr) {
+        return relocated_space;
+      }
+    }
+    error_msgs.push_back(local_error_msg);
+  }
+
+  // Step 1.b: Only have a cache file.
+  if (found_image && !has_system && has_cache) {
+    std::string local_error_msg;
+    std::unique_ptr<ImageSpace> cache_space =
+        ImageSpaceLoader::Load(image_location,
+                               cache_filename,
+                               is_zygote,
+                               is_global_cache,
+                               /* is_system */ false,
+                               /* relocated_version_used */ true,
+                               &local_error_msg);
+    if (cache_space != nullptr) {
+      return cache_space;
+    }
+    error_msgs.push_back(local_error_msg);
+  }
+
+  // Step 2: We have an existing image in /system.
+
+  // Step 2.a: We are not required to relocate it. Then we can use it directly.
+  bool relocate = Runtime::Current()->ShouldRelocate();
+
+  if (found_image && has_system && !relocate) {
+    std::string local_error_msg;
+    std::unique_ptr<ImageSpace> system_space =
+        ImageSpaceLoader::Load(image_location,
+                               system_filename,
+                               is_zygote,
+                               is_global_cache,
+                               /* is_system */ true,
+                               /* relocated_version_used */ false,
+                               &local_error_msg);
+    if (system_space != nullptr) {
+      return system_space;
+    }
+    error_msgs.push_back(local_error_msg);
+  }
+
+  // Step 2.b: We require a relocated image. Then we must patch it. This step fails if this is a
+  //           secondary image.
+  if (found_image && has_system && relocate) {
+    std::string local_error_msg;
+    if (!Runtime::Current()->IsImageDex2OatEnabled()) {
+      local_error_msg = "Patching disabled.";
+    } else if (secondary_image) {
+      local_error_msg = "Cannot patch a secondary image.";
+    } else if (ImageCreationAllowed(is_global_cache, &local_error_msg)) {
+      bool patch_success =
+          RelocateImage(image_location, cache_filename.c_str(), image_isa, &local_error_msg);
+      if (patch_success) {
+        std::unique_ptr<ImageSpace> patched_space =
+            ImageSpaceLoader::Load(image_location,
+                                   cache_filename,
+                                   is_zygote,
+                                   is_global_cache,
+                                   /* is_system */ false,
+                                   /* relocated_version_used */ true,
+                                   &local_error_msg);
+        if (patched_space != nullptr) {
+          return patched_space;
+        }
+      }
+    }
+    error_msgs.push_back(StringPrintf("Cannot relocate image %s to %s: %s",
+                                      image_location,
+                                      cache_filename.c_str(),
+                                      local_error_msg.c_str()));
+  }
+
+  // Step 3: We do not have an existing image in /system, so generate an image into the dalvik
+  //         cache. This step fails if this is a secondary image.
+  if (!has_system) {
+    std::string local_error_msg;
+    if (!Runtime::Current()->IsImageDex2OatEnabled()) {
+      local_error_msg = "Image compilation disabled.";
+    } else if (secondary_image) {
+      local_error_msg = "Cannot compile a secondary image.";
+    } else if (ImageCreationAllowed(is_global_cache, &local_error_msg)) {
+      bool compilation_success = GenerateImage(cache_filename, image_isa, &local_error_msg);
+      if (compilation_success) {
+        std::unique_ptr<ImageSpace> compiled_space =
+            ImageSpaceLoader::Load(image_location,
+                                   cache_filename,
+                                   is_zygote,
+                                   is_global_cache,
+                                   /* is_system */ false,
+                                   /* relocated_version_used */ true,
+                                   &local_error_msg);
+        if (compiled_space != nullptr) {
+          return compiled_space;
+        }
+      }
+    }
+    error_msgs.push_back(StringPrintf("Cannot compile image to %s: %s",
+                                      cache_filename.c_str(),
+                                      local_error_msg.c_str()));
+  }
+
+  // We failed. Prune the cache the free up space, create a compound error message and return no
+  // image.
+  PruneDalvikCache(image_isa);
+
+  std::ostringstream oss;
+  bool first = true;
+  for (auto msg : error_msgs) {
+    if (!first) {
+      oss << "\n    ";
+    }
+    oss << msg;
+  }
+  *error_msg = oss.str();
+
+  return nullptr;
+}
+
+std::unique_ptr<ImageSpace> ImageSpace::CreateFromAppImage(const char* image,
+                                                           const OatFile* oat_file,
+                                                           std::string* error_msg) {
+  return ImageSpaceLoader::Init(image,
+                                image,
+                                /*validate_oat_file*/false,
+                                oat_file,
+                                /*out*/error_msg);
+}
+
 const OatFile* ImageSpace::GetOatFile() const {
   return oat_file_non_owned_;
 }
@@ -1550,16 +1667,6 @@
   }
 }
 
-ImageSpace* ImageSpace::CreateFromAppImage(const char* image,
-                                           const OatFile* oat_file,
-                                           std::string* error_msg) {
-  return gc::space::ImageSpace::Init(image,
-                                     image,
-                                     /*validate_oat_file*/false,
-                                     oat_file,
-                                     /*out*/error_msg);
-}
-
 void ImageSpace::DumpSections(std::ostream& os) const {
   const uint8_t* base = Begin();
   const ImageHeader& header = GetImageHeader();
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index c9741d0..534232d 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -43,24 +43,19 @@
   // creation of the alloc space. The ReleaseOatFile will later be
   // used to transfer ownership of the OatFile to the ClassLinker when
   // it is initialized.
-  static ImageSpace* CreateBootImage(const char* image,
+  static std::unique_ptr<ImageSpace> CreateBootImage(const char* image,
                                      InstructionSet image_isa,
                                      bool secondary_image,
                                      std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Try to open an existing app image space.
-  static ImageSpace* CreateFromAppImage(const char* image,
-                                        const OatFile* oat_file,
-                                        std::string* error_msg)
+  static std::unique_ptr<ImageSpace> CreateFromAppImage(const char* image,
+                                                        const OatFile* oat_file,
+                                                        std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Reads the image header from the specified image location for the
-  // instruction set image_isa or dies trying.
-  static ImageHeader* ReadImageHeaderOrDie(const char* image_location,
-                                           InstructionSet image_isa);
-
-  // Reads the image header from the specified image location for the
   // instruction set image_isa. Returns null on failure, with
   // reason in error_msg.
   static ImageHeader* ReadImageHeader(const char* image_location,
@@ -158,21 +153,13 @@
   // relative to its DexFile inputs. Otherwise (for /data), validate the inputs and generate the
   // OatFile in /data/dalvik-cache if necessary. If the oat_file is null, it uses the oat file from
   // the image.
-  static ImageSpace* Init(const char* image_filename,
-                          const char* image_location,
-                          bool validate_oat_file,
-                          const OatFile* oat_file,
-                          std::string* error_msg)
+  static std::unique_ptr<ImageSpace> Init(const char* image_filename,
+                                          const char* image_location,
+                                          bool validate_oat_file,
+                                          const OatFile* oat_file,
+                                          std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  OatFile* OpenOatFile(const char* image, std::string* error_msg) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  bool ValidateOatFile(std::string* error_msg) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  friend class Space;
-
   static Atomic<uint32_t> bitmap_index_;
 
   std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap_;
@@ -194,6 +181,9 @@
 
   const std::string image_location_;
 
+  friend class ImageSpaceLoader;
+  friend class Space;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(ImageSpace);
 };
diff --git a/runtime/gc/space/image_space_fs.h b/runtime/gc/space/image_space_fs.h
index fc9a3cf..fa941c0 100644
--- a/runtime/gc/space/image_space_fs.h
+++ b/runtime/gc/space/image_space_fs.h
@@ -89,9 +89,11 @@
 static void PruneDalvikCache(InstructionSet isa) {
   CHECK_NE(isa, kNone);
   // Prune the base /data/dalvik-cache.
-  impl::DeleteDirectoryContents(GetDalvikCacheOrDie(".", false), false);
+  // Note: GetDalvikCache may return the empty string if the directory doesn't
+  // exist. It is safe to pass "" to DeleteDirectoryContents, so this is okay.
+  impl::DeleteDirectoryContents(GetDalvikCache("."), false);
   // Prune /data/dalvik-cache/<isa>.
-  impl::DeleteDirectoryContents(GetDalvikCacheOrDie(GetInstructionSetString(isa), false), false);
+  impl::DeleteDirectoryContents(GetDalvikCache(GetInstructionSetString(isa)), false);
 
   // Be defensive. There should be a runtime created here, but this may be called in a test.
   if (Runtime::Current() != nullptr) {
@@ -104,7 +106,8 @@
 // present, it usually means the boot didn't complete. We wipe the entire dalvik
 // cache if that's the case.
 static void MarkZygoteStart(const InstructionSet isa, const uint32_t max_failed_boots) {
-  const std::string isa_subdir = GetDalvikCacheOrDie(GetInstructionSetString(isa), false);
+  const std::string isa_subdir = GetDalvikCache(GetInstructionSetString(isa));
+  CHECK(!isa_subdir.empty()) << "Dalvik cache not found";
   const std::string boot_marker = isa_subdir + "/.booting";
   const char* file_name = boot_marker.c_str();
 
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 4e56c4a..c6b2870 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -39,7 +39,7 @@
     int rc = call args; \
     if (UNLIKELY(rc != 0)) { \
       errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
+      PLOG(FATAL) << # call << " failed for " << (what); \
     } \
   } while (false)
 
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 9a2d0c6..2d71294 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -126,15 +126,20 @@
   } else {
     bool is_live_percent_valid = live_bytes_ != static_cast<size_t>(-1);
     if (is_live_percent_valid) {
-      uint live_percent = GetLivePercent();
+      DCHECK(IsInToSpace());
+      DCHECK(!IsLargeTail());
+      DCHECK_NE(live_bytes_, static_cast<size_t>(-1));
+      DCHECK_LE(live_bytes_, BytesAllocated());
+      const size_t bytes_allocated = RoundUp(BytesAllocated(), kRegionSize);
+      DCHECK_LE(live_bytes_, bytes_allocated);
       if (IsAllocated()) {
         // Side node: live_percent == 0 does not necessarily mean
         // there's no live objects due to rounding (there may be a
         // few).
-        result = live_percent < kEvaculateLivePercentThreshold;
+        result = live_bytes_ * 100U < kEvaculateLivePercentThreshold * bytes_allocated;
       } else {
         DCHECK(IsLarge());
-        result = live_percent == 0U;
+        result = live_bytes_ == 0U;
       }
     } else {
       result = false;
@@ -216,17 +221,6 @@
   evac_region_ = nullptr;
 }
 
-void RegionSpace::AssertAllRegionLiveBytesZeroOrCleared() {
-  if (kIsDebugBuild) {
-    MutexLock mu(Thread::Current(), region_lock_);
-    for (size_t i = 0; i < num_regions_; ++i) {
-      Region* r = &regions_[i];
-      size_t live_bytes = r->LiveBytes();
-      CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
-    }
-  }
-}
-
 void RegionSpace::LogFragmentationAllocFailure(std::ostream& os,
                                                size_t /* failed_alloc_bytes */) {
   size_t max_contiguous_allocation = 0;
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 14e8005..823aa38 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -215,7 +215,16 @@
     reg->AddLiveBytes(alloc_size);
   }
 
-  void AssertAllRegionLiveBytesZeroOrCleared() REQUIRES(!region_lock_);
+  void AssertAllRegionLiveBytesZeroOrCleared() REQUIRES(!region_lock_) {
+    if (kIsDebugBuild) {
+      MutexLock mu(Thread::Current(), region_lock_);
+      for (size_t i = 0; i < num_regions_; ++i) {
+        Region* r = &regions_[i];
+        size_t live_bytes = r->LiveBytes();
+        CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
+      }
+    }
+  }
 
   void RecordAlloc(mirror::Object* ref) REQUIRES(!region_lock_);
   bool AllocNewTlab(Thread* self) REQUIRES(!region_lock_);
@@ -386,18 +395,6 @@
       return live_bytes_;
     }
 
-    uint GetLivePercent() const {
-      DCHECK(IsInToSpace());
-      DCHECK(!IsLargeTail());
-      DCHECK_NE(live_bytes_, static_cast<size_t>(-1));
-      DCHECK_LE(live_bytes_, BytesAllocated());
-      size_t bytes_allocated = RoundUp(BytesAllocated(), kRegionSize);
-      DCHECK_GE(bytes_allocated, 0U);
-      uint result = (live_bytes_ * 100U) / bytes_allocated;
-      DCHECK_LE(result, 100U);
-      return result;
-    }
-
     size_t BytesAllocated() const {
       if (IsLarge()) {
         DCHECK_LT(begin_ + kRegionSize, top_);
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 3734bcc..0304d0d 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -195,7 +195,8 @@
     return root_.IsNull();
   }
 
-  ALWAYS_INLINE GcRoot(MirrorType* ref = nullptr) SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE GcRoot() {}
+  explicit ALWAYS_INLINE GcRoot(MirrorType* ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
   // Root visitors take pointers to root_ and place them in CompressedReference** arrays. We use a
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
new file mode 100644
index 0000000..40b71c4
--- /dev/null
+++ b/runtime/generated/asm_support_gen.h
@@ -0,0 +1,141 @@
+
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+#define ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
+// This file has been auto-generated by cpp-define-generator; do not edit directly.
+
+#define STACK_REFERENCE_SIZE 0x4
+DEFINE_CHECK_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), (static_cast<size_t>(sizeof(art::StackReference<art::mirror::Object>))))
+#define COMPRESSED_REFERENCE_SIZE 0x4
+DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE), (static_cast<size_t>(sizeof(art::mirror::CompressedReference<art::mirror::Object>))))
+#define COMPRESSED_REFERENCE_SIZE_SHIFT 0x2
+DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE_SHIFT), (static_cast<size_t>(art::WhichPowerOf2(sizeof(art::mirror::CompressedReference<art::mirror::Object>)))))
+#define RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveAllCalleeSaves))))
+#define RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET 0x8
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveRefsOnly))))
+#define RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET 0x10
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveRefsAndArgs))))
+#define RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 0x18
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveEverything))))
+#define THREAD_FLAGS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_FLAGS_OFFSET), (static_cast<int32_t>(art::Thread:: ThreadFlagsOffset<art::kRuntimePointerSize>().Int32Value())))
+#define THREAD_ID_OFFSET 12
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_ID_OFFSET), (static_cast<int32_t>(art::Thread:: ThinLockIdOffset<art::kRuntimePointerSize>().Int32Value())))
+#define THREAD_IS_GC_MARKING_OFFSET 52
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_IS_GC_MARKING_OFFSET), (static_cast<int32_t>(art::Thread:: IsGcMarkingOffset<art::kRuntimePointerSize>().Int32Value())))
+#define THREAD_CARD_TABLE_OFFSET 128
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CARD_TABLE_OFFSET), (static_cast<int32_t>(art::Thread:: CardTableOffset<art::kRuntimePointerSize>().Int32Value())))
+#define CODEITEM_INSNS_OFFSET 16
+DEFINE_CHECK_EQ(static_cast<int32_t>(CODEITEM_INSNS_OFFSET), (static_cast<int32_t>(__builtin_offsetof(art::DexFile::CodeItem, insns_))))
+#define MIRROR_OBJECT_CLASS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(MIRROR_OBJECT_CLASS_OFFSET), (static_cast<int32_t>(art::mirror::Object:: ClassOffset().Int32Value())))
+#define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
+DEFINE_CHECK_EQ(static_cast<int32_t>(MIRROR_OBJECT_LOCK_WORD_OFFSET), (static_cast<int32_t>(art::mirror::Object:: MonitorOffset().Int32Value())))
+#define MIRROR_CLASS_STATUS_INITIALIZED 0xa
+DEFINE_CHECK_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED), (static_cast<uint32_t>((art::mirror::Class::kStatusInitialized))))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), (static_cast<uint32_t>((art::kAccClassIsFinalizable))))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT 0x1f
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT), (static_cast<uint32_t>((art::MostSignificantBit(art::kAccClassIsFinalizable)))))
+#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_32 20
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_METHODS_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedMethodsOffset(art::PointerSize::k32).Int32Value())))
+#define ART_METHOD_DEX_CACHE_METHODS_OFFSET_64 24
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_METHODS_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedMethodsOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_32 24
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_TYPES_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedTypesOffset(art::PointerSize::k32).Int32Value())))
+#define ART_METHOD_DEX_CACHE_TYPES_OFFSET_64 32
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DEX_CACHE_TYPES_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: DexCacheResolvedTypesOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_JNI_OFFSET_32 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_JNI_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromJniOffset(art::PointerSize::k32).Int32Value())))
+#define ART_METHOD_JNI_OFFSET_64 40
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_JNI_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromJniOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_QUICK_CODE_OFFSET_32 32
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k32).Int32Value())))
+#define ART_METHOD_QUICK_CODE_OFFSET_64 48
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).Int32Value())))
+#define ART_METHOD_DECLARING_CLASS_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DECLARING_CLASS_OFFSET), (static_cast<int32_t>(art::ArtMethod:: DeclaringClassOffset().Int32Value())))
+#define DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET 40
+DEFINE_CHECK_EQ(static_cast<int32_t>(DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET), (static_cast<int32_t>(art::mirror::Class:: DexCacheStringsOffset().Int32Value())))
+#define STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT 3
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), (static_cast<int32_t>(art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair)))))
+#define STRING_DEX_CACHE_SIZE_MINUS_ONE 1023
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_SIZE_MINUS_ONE), (static_cast<int32_t>(art::mirror::DexCache::kDexCacheStringCacheSize - 1)))
+#define STRING_DEX_CACHE_HASH_BITS 10
+DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_HASH_BITS), (static_cast<int32_t>(art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))))
+#define MIN_LARGE_OBJECT_THRESHOLD 0x3000
+DEFINE_CHECK_EQ(static_cast<size_t>(MIN_LARGE_OBJECT_THRESHOLD), (static_cast<size_t>(art::gc::Heap::kMinLargeObjectThreshold)))
+#define LOCK_WORD_STATE_SHIFT 30
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kStateShift)))
+#define LOCK_WORD_STATE_MASK 0xc0000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kStateMaskShifted)))
+#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_READ_BARRIER_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kReadBarrierStateShift)))
+#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x10000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShifted)))
+#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xefffffff
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
+#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShiftedToggled)))
+#define LOCK_WORD_GC_STATE_SHIFT 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_GC_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kGCStateShift)))
+#define LOCK_WORD_MARK_BIT_SHIFT 29
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_MARK_BIT_SHIFT), (static_cast<int32_t>(art::LockWord::kMarkBitStateShift)))
+#define LOCK_WORD_MARK_BIT_MASK_SHIFTED 0x20000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_MARK_BIT_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kMarkBitStateMaskShifted)))
+#define OBJECT_ALIGNMENT_MASK 0x7
+DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1)))
+#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8
+DEFINE_CHECK_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED), (static_cast<uint32_t>(~static_cast<uint32_t>(art::kObjectAlignment - 1))))
+#define OBJECT_ALIGNMENT_MASK_TOGGLED64 0xfffffffffffffff8
+DEFINE_CHECK_EQ(static_cast<uint64_t>(OBJECT_ALIGNMENT_MASK_TOGGLED64), (static_cast<uint64_t>(~static_cast<uint64_t>(art::kObjectAlignment - 1))))
+#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), (static_cast<int32_t>((art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 3
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), (static_cast<int32_t>((art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 7
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK), (static_cast<int32_t>((static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff8
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32), (static_cast<uint32_t>((~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff8
+DEFINE_CHECK_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64), (static_cast<uint64_t>((~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1)))))
+#define ROSALLOC_RUN_FREE_LIST_OFFSET 8
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListOffset()))))
+#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListHeadOffset()))))
+#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunFreeListSizeOffset()))))
+#define ROSALLOC_SLOT_NEXT_OFFSET 0
+DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_SLOT_NEXT_OFFSET), (static_cast<int32_t>((art::gc::allocator::RosAlloc::RunSlotNextOffset()))))
+#define THREAD_SUSPEND_REQUEST 1
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_REQUEST), (static_cast<int32_t>((art::kSuspendRequest))))
+#define THREAD_CHECKPOINT_REQUEST 2
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kCheckpointRequest))))
+#define JIT_CHECK_OSR (-1)
+DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR))))
+#define JIT_HOTNESS_DISABLE (-2)
+DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_HOTNESS_DISABLE), (static_cast<int16_t>((art::jit::kJitHotnessDisabled))))
+
+#endif  // ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
diff --git a/runtime/globals.h b/runtime/globals.h
index e7ea6f3..9045d40 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -40,8 +40,15 @@
 // compile-time constant so the compiler can generate better code.
 static constexpr int kPageSize = 4096;
 
+// Returns whether the given memory offset can be used for generating
+// an implicit null check.
+static inline bool CanDoImplicitNullCheckOn(uintptr_t offset) {
+  return offset < kPageSize;
+}
+
 // Required object alignment
-static constexpr size_t kObjectAlignment = 8;
+static constexpr size_t kObjectAlignmentShift = 3;
+static constexpr size_t kObjectAlignment = 1u << kObjectAlignmentShift;
 static constexpr size_t kLargeObjectAlignment = kPageSize;
 
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
@@ -51,11 +58,31 @@
 static constexpr bool kIsDebugBuild = true;
 #endif
 
-// Whether or not this is a target (vs host) build. Useful in conditionals where ART_TARGET isn't.
+// ART_TARGET - Defined for target builds of ART.
+// ART_TARGET_LINUX - Defined for target Linux builds of ART.
+// ART_TARGET_ANDROID - Defined for target Android builds of ART.
+// Note: Either ART_TARGET_LINUX or ART_TARGET_ANDROID need to be set when ART_TARGET is set.
+// Note: When ART_TARGET_LINUX is defined mem_map.h will not be using Ashmem for memory mappings
+// (usually only available on Android kernels).
 #if defined(ART_TARGET)
+// Useful in conditionals where ART_TARGET isn't.
 static constexpr bool kIsTargetBuild = true;
+#if defined(ART_TARGET_LINUX)
+static constexpr bool kIsTargetLinux = true;
+#elif defined(ART_TARGET_ANDROID)
+static constexpr bool kIsTargetLinux = false;
+#else
+#error "Either ART_TARGET_LINUX or ART_TARGET_ANDROID needs to be defined for target builds."
+#endif
 #else
 static constexpr bool kIsTargetBuild = false;
+#if defined(ART_TARGET_LINUX)
+#error "ART_TARGET_LINUX defined for host build."
+#elif defined(ART_TARGET_ANDROID)
+#error "ART_TARGET_ANDROID defined for host build."
+#else
+static constexpr bool kIsTargetLinux = false;
+#endif
 #endif
 
 // Garbage collector constants.
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index ca206ef..2e1b8ed 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -57,9 +57,9 @@
   return header_size + data_size;
 }
 
-inline size_t HandleScope::SizeOf(size_t pointer_size, uint32_t num_references) {
+inline size_t HandleScope::SizeOf(PointerSize pointer_size, uint32_t num_references) {
   // Assume that the layout is packed.
-  size_t header_size = pointer_size + sizeof(number_of_references_);
+  size_t header_size = ReferencesOffset(pointer_size);
   size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
   return header_size + data_size;
 }
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index d53a0e4..67d7054 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -19,6 +19,7 @@
 
 #include <stack>
 
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "handle.h"
@@ -52,7 +53,7 @@
   static size_t SizeOf(uint32_t num_references);
 
   // Returns the size of a HandleScope containing num_references handles.
-  static size_t SizeOf(size_t pointer_size, uint32_t num_references);
+  static size_t SizeOf(PointerSize pointer_size, uint32_t num_references);
 
   // Link to previous HandleScope or null.
   HandleScope* GetLink() const {
@@ -73,18 +74,18 @@
   ALWAYS_INLINE bool Contains(StackReference<mirror::Object>* handle_scope_entry) const;
 
   // Offset of link within HandleScope, used by generated code.
-  static size_t LinkOffset(size_t pointer_size ATTRIBUTE_UNUSED) {
+  static constexpr size_t LinkOffset(PointerSize pointer_size ATTRIBUTE_UNUSED) {
     return 0;
   }
 
   // Offset of length within handle scope, used by generated code.
-  static size_t NumberOfReferencesOffset(size_t pointer_size) {
-    return pointer_size;
+  static constexpr size_t NumberOfReferencesOffset(PointerSize pointer_size) {
+    return static_cast<size_t>(pointer_size);
   }
 
   // Offset of link within handle scope, used by generated code.
-  static size_t ReferencesOffset(size_t pointer_size) {
-    return pointer_size + sizeof(number_of_references_);
+  static constexpr size_t ReferencesOffset(PointerSize pointer_size) {
+    return NumberOfReferencesOffset(pointer_size) + sizeof(number_of_references_);
   }
 
   // Placement new creation.
@@ -96,7 +97,7 @@
  protected:
   // Return backing storage used for references.
   ALWAYS_INLINE StackReference<mirror::Object>* GetReferences() const {
-    uintptr_t address = reinterpret_cast<uintptr_t>(this) + ReferencesOffset(sizeof(void*));
+    uintptr_t address = reinterpret_cast<uintptr_t>(this) + ReferencesOffset(kRuntimePointerSize);
     return reinterpret_cast<StackReference<mirror::Object>*>(address);
   }
 
diff --git a/runtime/handle_scope_test.cc b/runtime/handle_scope_test.cc
index dc99987..58f3800 100644
--- a/runtime/handle_scope_test.cc
+++ b/runtime/handle_scope_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "base/enums.h"
 #include "gtest/gtest.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change.h"
@@ -48,13 +49,13 @@
 
   {
     uintptr_t* link_ptr = reinterpret_cast<uintptr_t*>(table_base_ptr +
-        HandleScope::LinkOffset(sizeof(void*)));
+        HandleScope::LinkOffset(kRuntimePointerSize));
     EXPECT_EQ(*link_ptr, static_cast<size_t>(0x5678));
   }
 
   {
     uint32_t* num_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
-        HandleScope::NumberOfReferencesOffset(sizeof(void*)));
+        HandleScope::NumberOfReferencesOffset(kRuntimePointerSize));
     EXPECT_EQ(*num_ptr, static_cast<size_t>(0x9ABC));
   }
 
@@ -64,7 +65,7 @@
     EXPECT_EQ(sizeof(StackReference<mirror::Object>), sizeof(uint32_t));
 
     uint32_t* ref_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
-        HandleScope::ReferencesOffset(sizeof(void*)));
+        HandleScope::ReferencesOffset(kRuntimePointerSize));
     EXPECT_EQ(*ref_ptr, static_cast<uint32_t>(0x1234));
   }
 }
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
index cd0557a..28620db 100644
--- a/runtime/image-inl.h
+++ b/runtime/image-inl.h
@@ -48,7 +48,7 @@
 template <typename Visitor>
 inline void ImageHeader::VisitPackedImTables(const Visitor& visitor,
                                              uint8_t* base,
-                                             size_t pointer_size) const {
+                                             PointerSize pointer_size) const {
   const ImageSection& section = GetImageSection(kSectionImTables);
   for (size_t pos = 0; pos < section.Size();) {
     ImTable* imt = reinterpret_cast<ImTable*>(base + section.Offset() + pos);
@@ -66,7 +66,7 @@
 template <typename Visitor>
 inline void ImageHeader::VisitPackedImtConflictTables(const Visitor& visitor,
                                                       uint8_t* base,
-                                                      size_t pointer_size) const {
+                                                      PointerSize pointer_size) const {
   const ImageSection& section = GetImageSection(kSectionIMTConflictTables);
   for (size_t pos = 0; pos < section.Size(); ) {
     auto* table = reinterpret_cast<ImtConflictTable*>(base + section.Offset() + pos);
diff --git a/runtime/image.cc b/runtime/image.cc
index 2362a92..6888183 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -160,7 +160,7 @@
 
 void ImageHeader::VisitPackedArtMethods(ArtMethodVisitor* visitor,
                                         uint8_t* base,
-                                        size_t pointer_size) const {
+                                        PointerSize pointer_size) const {
   const size_t method_alignment = ArtMethod::Alignment(pointer_size);
   const size_t method_size = ArtMethod::Size(pointer_size);
   const ImageSection& methods = GetMethodsSection();
diff --git a/runtime/image.h b/runtime/image.h
index 06f06ee..9ff18d6 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -19,6 +19,7 @@
 
 #include <string.h>
 
+#include "base/enums.h"
 #include "globals.h"
 #include "mirror/object.h"
 
@@ -156,7 +157,11 @@
     return reinterpret_cast<uint8_t*>(oat_file_end_);
   }
 
-  uint32_t GetPointerSize() const {
+  PointerSize GetPointerSize() const {
+    return ConvertToPointerSize(pointer_size_);
+  }
+
+  uint32_t GetPointerSizeUnchecked() const {
     return pointer_size_;
   }
 
@@ -178,9 +183,10 @@
     kResolutionMethod,
     kImtConflictMethod,
     kImtUnimplementedMethod,
-    kCalleeSaveMethod,
-    kRefsOnlySaveMethod,
-    kRefsAndArgsSaveMethod,
+    kSaveAllCalleeSavesMethod,
+    kSaveRefsOnlyMethod,
+    kSaveRefsAndArgsMethod,
+    kSaveEverythingMethod,
     kImageMethodsCount,  // Number of elements in enum.
   };
 
@@ -273,7 +279,9 @@
 
   // Visit ArtMethods in the section starting at base. Includes runtime methods.
   // TODO: Delete base parameter if it is always equal to GetImageBegin.
-  void VisitPackedArtMethods(ArtMethodVisitor* visitor, uint8_t* base, size_t pointer_size) const;
+  void VisitPackedArtMethods(ArtMethodVisitor* visitor,
+                             uint8_t* base,
+                             PointerSize pointer_size) const;
 
   // Visit ArtMethods in the section starting at base.
   // TODO: Delete base parameter if it is always equal to GetImageBegin.
@@ -282,12 +290,12 @@
   template <typename Visitor>
   void VisitPackedImTables(const Visitor& visitor,
                            uint8_t* base,
-                           size_t pointer_size) const;
+                           PointerSize pointer_size) const;
 
   template <typename Visitor>
   void VisitPackedImtConflictTables(const Visitor& visitor,
                                     uint8_t* base,
-                                    size_t pointer_size) const;
+                                    PointerSize pointer_size) const;
 
  private:
   static const uint8_t kImageMagic[4];
diff --git a/runtime/imtable.h b/runtime/imtable.h
index 51faf70..2416621 100644
--- a/runtime/imtable.h
+++ b/runtime/imtable.h
@@ -32,10 +32,10 @@
   // (non-marker) interfaces.
   static constexpr size_t kSize = IMT_SIZE;
 
-  ArtMethod* Get(size_t index, size_t pointer_size) {
+  ArtMethod* Get(size_t index, PointerSize pointer_size) {
     DCHECK_LT(index, kSize);
     uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
-    if (pointer_size == 4) {
+    if (pointer_size == PointerSize::k32) {
       uint32_t value = *reinterpret_cast<uint32_t*>(ptr);
       return reinterpret_cast<ArtMethod*>(value);
     } else {
@@ -44,10 +44,10 @@
     }
   }
 
-  void Set(size_t index, ArtMethod* method, size_t pointer_size) {
+  void Set(size_t index, ArtMethod* method, PointerSize pointer_size) {
     DCHECK_LT(index, kSize);
     uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
-    if (pointer_size == 4) {
+    if (pointer_size == PointerSize::k32) {
       uintptr_t value = reinterpret_cast<uintptr_t>(method);
       DCHECK_EQ(static_cast<uint32_t>(value), value);  // Check that we dont lose any non 0 bits.
       *reinterpret_cast<uint32_t*>(ptr) = static_cast<uint32_t>(value);
@@ -56,18 +56,18 @@
     }
   }
 
-  static size_t OffsetOfElement(size_t index, size_t pointer_size) {
-    return index * pointer_size;
+  static size_t OffsetOfElement(size_t index, PointerSize pointer_size) {
+    return index * static_cast<size_t>(pointer_size);
   }
 
-  void Populate(ArtMethod** data, size_t pointer_size) {
+  void Populate(ArtMethod** data, PointerSize pointer_size) {
     for (size_t i = 0; i < kSize; ++i) {
       Set(i, data[i], pointer_size);
     }
   }
 
-  constexpr static size_t SizeInBytes(size_t pointer_size) {
-    return kSize * pointer_size;
+  constexpr static size_t SizeInBytes(PointerSize pointer_size) {
+    return kSize * static_cast<size_t>(pointer_size);
   }
 };
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 61119f8..4a86e36 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -94,7 +94,7 @@
     // We need the class to be resolved to install/uninstall stubs. Otherwise its methods
     // could not be initialized or linked with regards to class inheritance.
   } else {
-    for (ArtMethod& method : klass->GetMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
       InstallStubsForMethod(&method);
     }
   }
@@ -356,7 +356,7 @@
             LOG(INFO) << "  Removing exit stub in " << DescribeLocation();
           }
           if (instrumentation_frame.interpreter_entry_) {
-            CHECK(m == Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
+            CHECK(m == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
           } else {
             CHECK(m == instrumentation_frame.method_) << PrettyMethod(m);
           }
@@ -886,7 +886,7 @@
   ConfigureStubs(key, InstrumentationLevel::kInstrumentNothing);
 }
 
-const void* Instrumentation::GetQuickCodeFor(ArtMethod* method, size_t pointer_size) const {
+const void* Instrumentation::GetQuickCodeFor(ArtMethod* method, PointerSize pointer_size) const {
   Runtime* runtime = Runtime::Current();
   if (LIKELY(!instrumentation_stubs_installed_)) {
     const void* code = method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
@@ -1063,7 +1063,7 @@
 
   ArtMethod* method = instrumentation_frame.method_;
   uint32_t length;
-  const size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   char return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0];
   JValue return_value;
   if (return_shorty == 'V') {
@@ -1088,7 +1088,7 @@
   bool deoptimize = (visitor.caller != nullptr) &&
                     (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
                     Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
-  if (deoptimize) {
+  if (deoptimize && Runtime::Current()->IsDeoptimizeable(*return_pc)) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
                                 PrettyMethod(visitor.caller).c_str(),
@@ -1110,7 +1110,7 @@
   }
 }
 
-void Instrumentation::PopMethodForUnwind(Thread* self, bool is_deoptimization) const {
+uintptr_t Instrumentation::PopMethodForUnwind(Thread* self, bool is_deoptimization) const {
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -1134,6 +1134,7 @@
     uint32_t dex_pc = DexFile::kDexNoIndex;
     MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
   }
+  return instrumentation_frame.return_pc_;
 }
 
 std::string InstrumentationStackFrame::Dump() const {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index ce6ead4..757be8e 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -22,6 +22,7 @@
 #include <unordered_set>
 
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
@@ -234,7 +235,7 @@
   // Get the quick code for the given method. More efficient than asking the class linker as it
   // will short-cut to GetCode if instrumentation and static method resolution stubs aren't
   // installed.
-  const void* GetQuickCodeFor(ArtMethod* method, size_t pointer_size) const
+  const void* GetQuickCodeFor(ArtMethod* method, PointerSize pointer_size) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ForceInterpretOnly() {
@@ -402,7 +403,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
-  void PopMethodForUnwind(Thread* self, bool is_deoptimization) const
+  // Returns the return pc for the instrumentation frame that's popped.
+  uintptr_t PopMethodForUnwind(Thread* self, bool is_deoptimization) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Call back for configure stubs.
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index 56e3bc5..684c471 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -16,6 +16,7 @@
 
 #include "instrumentation.h"
 
+#include "base/enums.h"
 #include "common_runtime_test.h"
 #include "common_throws.h"
 #include "class_linker-inl.h"
@@ -461,7 +462,7 @@
   mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
   ASSERT_TRUE(klass != nullptr);
   ArtMethod* method_to_deoptimize = klass->FindDeclaredDirectMethod("instanceMethod", "()V",
-                                                                    sizeof(void*));
+                                                                    kRuntimePointerSize);
   ASSERT_TRUE(method_to_deoptimize != nullptr);
 
   EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
@@ -508,7 +509,7 @@
   mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
   ASSERT_TRUE(klass != nullptr);
   ArtMethod* method_to_deoptimize = klass->FindDeclaredDirectMethod("instanceMethod", "()V",
-                                                                    sizeof(void*));
+                                                                    kRuntimePointerSize);
   ASSERT_TRUE(method_to_deoptimize != nullptr);
 
   EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 8c42b3a..101c9a1 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -20,6 +20,9 @@
 
 #include "common_throws.h"
 #include "interpreter_common.h"
+#include "interpreter_goto_table_impl.h"
+#include "interpreter_mterp_impl.h"
+#include "interpreter_switch_impl.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -242,28 +245,6 @@
 
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 
-#if defined(__clang__)
-// Clang 3.4 fails to build the goto interpreter implementation.
-template<bool do_access_check, bool transaction_active>
-JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
-  LOG(FATAL) << "UNREACHABLE";
-  UNREACHABLE();
-}
-// Explicit definitions of ExecuteGotoImpl.
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                    ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, true>(Thread* self,  const DexFile::CodeItem* code_item,
-                                   ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_REQUIRES(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                    ShadowFrame& shadow_frame, JValue result_register);
-#endif
-
 static inline JValue Execute(
     Thread* self,
     const DexFile::CodeItem* code_item,
@@ -324,7 +305,7 @@
       } else {
         while (true) {
           // Mterp does not support all instrumentation/debugging.
-          if (MterpShouldSwitchInterpreters()) {
+          if (MterpShouldSwitchInterpreters() != 0) {
             return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
                                                    false);
           }
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 53d5e43..ac146b3 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -18,6 +18,7 @@
 
 #include <cmath>
 
+#include "base/enums.h"
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "jit/jit.h"
@@ -29,9 +30,6 @@
 namespace art {
 namespace interpreter {
 
-// All lambda closures have to be a consecutive pair of virtual registers.
-static constexpr size_t kLambdaVirtualRegisterWidth = 2;
-
 void ThrowNullPointerExceptionFromInterpreter() {
   ThrowNullPointerExceptionFromDexPC();
 }
@@ -537,7 +535,7 @@
   }
   method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
                  (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                 result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
+                 result, method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty());
 }
 
 void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,
@@ -656,7 +654,8 @@
     // As a special case for proxy methods, which are not dex-backed,
     // we have to retrieve type information from the proxy's method
     // interface method instead (which is dex backed since proxies are never interfaces).
-    ArtMethod* method = new_shadow_frame->GetMethod()->GetInterfaceMethodIfProxy(sizeof(void*));
+    ArtMethod* method =
+        new_shadow_frame->GetMethod()->GetInterfaceMethodIfProxy(kRuntimePointerSize);
 
     // We need to do runtime check on reference assignment. We need to load the shorty
     // to get the exact type of each reference argument.
@@ -686,7 +685,7 @@
         case 'L': {
           Object* o = shadow_frame.GetVRegReference(src_reg);
           if (do_assignability_check && o != nullptr) {
-            size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+            PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
             Class* arg_type =
                 method->GetClassFromTypeIndex(
                     params->GetTypeItem(shorty_pos).type_idx_, true /* resolve */, pointer_size);
@@ -730,7 +729,6 @@
 
     // Fast path: no extra checks.
     if (is_range) {
-      // TODO: Implement the range version of invoke-lambda
       uint16_t first_src_reg = vregC;
 
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
@@ -770,34 +768,6 @@
 }
 
 template<bool is_range, bool do_assignability_check>
-bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-                  const Instruction* inst, uint16_t inst_data ATTRIBUTE_UNUSED, JValue* result) {
-  const uint4_t num_additional_registers = inst->VRegB_25x();
-  // Argument word count.
-  const uint16_t number_of_inputs = num_additional_registers + kLambdaVirtualRegisterWidth;
-  // The lambda closure register is always present and is not encoded in the count.
-  // Furthermore, the lambda closure register is always wide, so it counts as 2 inputs.
-
-  // TODO: find a cleaner way to separate non-range and range information without duplicating
-  //       code.
-  uint32_t arg[Instruction::kMaxVarArgRegs25x];  // only used in invoke-XXX.
-  uint32_t vregC = 0;   // only used in invoke-XXX-range.
-  if (is_range) {
-    vregC = inst->VRegC_3rc();
-  } else {
-    // TODO(iam): See if it's possible to remove inst_data dependency from 35x to avoid this path
-    inst->GetAllArgs25x(arg);
-  }
-
-  // TODO: if there's an assignability check, throw instead?
-  DCHECK(called_method->IsStatic());
-
-  return DoCallCommon<is_range, do_assignability_check>(
-      called_method, self, shadow_frame,
-      result, number_of_inputs, arg, vregC);
-}
-
-template<bool is_range, bool do_assignability_check>
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result) {
   // Argument word count.
@@ -945,20 +915,6 @@
 EXPLICIT_DO_CALL_TEMPLATE_DECL(true, true);
 #undef EXPLICIT_DO_CALL_TEMPLATE_DECL
 
-// Explicit DoLambdaCall template function declarations.
-#define EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(_is_range, _do_assignability_check)               \
-  template SHARED_REQUIRES(Locks::mutator_lock_)                                                \
-  bool DoLambdaCall<_is_range, _do_assignability_check>(ArtMethod* method, Thread* self,        \
-                                                        ShadowFrame& shadow_frame,              \
-                                                        const Instruction* inst,                \
-                                                        uint16_t inst_data,                     \
-                                                        JValue* result)
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, false);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, true);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, false);
-EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, true);
-#undef EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL
-
 // Explicit DoFilledNewArray template function declarations.
 #define EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(_is_range_, _check, _transaction_active)       \
   template SHARED_REQUIRES(Locks::mutator_lock_)                                                  \
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 3750b7a..ce3b1eb 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -23,9 +23,11 @@
 
 #include <iostream>
 #include <sstream>
+#include <atomic>
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "class_linker-inl.h"
@@ -35,13 +37,8 @@
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
 #include "jit/jit.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/box_table.h"
-#include "lambda/closure.h"
-#include "lambda/closure_builder-inl.h"
-#include "lambda/leaking_allocator.h"
-#include "lambda/shorty_field_type.h"
 #include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -68,21 +65,6 @@
 namespace art {
 namespace interpreter {
 
-// External references to all interpreter implementations.
-
-template<bool do_access_check, bool transaction_active>
-extern JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
-                                ShadowFrame& shadow_frame, JValue result_register,
-                                bool interpret_one_instruction);
-
-template<bool do_access_check, bool transaction_active>
-extern JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item,
-                              ShadowFrame& shadow_frame, JValue result_register);
-
-// Mterp does not support transactions or access check, thus no templated versions.
-extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item,
-                                 ShadowFrame* shadow_frame, JValue* result_register);
-
 void ThrowNullPointerExceptionFromInterpreter()
     SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -141,488 +123,7 @@
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
-// Invokes the given lambda closure. This is part of the invocation support and is used by
-// DoLambdaInvoke functions.
-// Returns true on success, otherwise throws an exception and returns false.
-template<bool is_range, bool do_assignability_check>
-bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-                  const Instruction* inst, uint16_t inst_data, JValue* result);
-
-// Validates that the art method corresponding to a lambda method target
-// is semantically valid:
-//
-// Must be ACC_STATIC and ACC_LAMBDA. Must be a concrete managed implementation
-// (i.e. not native, not proxy, not abstract, ...).
-//
-// If the validation fails, return false and raise an exception.
-static inline bool IsValidLambdaTargetOrThrow(ArtMethod* called_method)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  bool success = false;
-
-  if (UNLIKELY(called_method == nullptr)) {
-    // The shadow frame should already be pushed, so we don't need to update it.
-  } else if (UNLIKELY(!called_method->IsInvokable())) {
-    called_method->ThrowInvocationTimeError();
-    // We got an error.
-    // TODO(iam): Also handle the case when the method is non-static, what error do we throw?
-    // TODO(iam): Also make sure that ACC_LAMBDA is set.
-  } else if (UNLIKELY(called_method->GetCodeItem() == nullptr)) {
-    // Method could be native, proxy method, etc. Lambda targets have to be concrete impls,
-    // so don't allow this.
-  } else {
-    success = true;
-  }
-
-  return success;
-}
-
-// Write out the 'Closure*' into vreg and vreg+1, as if it was a jlong.
-static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame,
-                                               const lambda::Closure& lambda_closure,
-                                               uint32_t vreg) {
-  // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
-  uint32_t closure_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&lambda_closure));
-  uint32_t closure_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(&lambda_closure)
-                                                    >> BitSizeOf<uint32_t>());
-  // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
-  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-
-  DCHECK_NE(closure_lo | closure_hi, 0u);
-
-  shadow_frame.SetVReg(vreg, closure_lo);
-  shadow_frame.SetVReg(vreg + 1, closure_hi);
-}
-
-// Handles create-lambda instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-//
-// The closure must be allocated big enough to hold the data, and should not be
-// pre-initialized. It is initialized with the actual captured variables as a side-effect,
-// although this should be unimportant to the caller since this function also handles storing it to
-// the ShadowFrame.
-//
-// As a work-in-progress implementation, this shoves the ArtMethod object corresponding
-// to the target dex method index into the target register vA and vA + 1.
-template<bool do_access_check>
-static inline bool DoCreateLambda(Thread* self,
-                                  const Instruction* inst,
-                                  /*inout*/ShadowFrame& shadow_frame,
-                                  /*inout*/lambda::ClosureBuilder* closure_builder,
-                                  /*inout*/lambda::Closure* uninitialized_closure) {
-  DCHECK(closure_builder != nullptr);
-  DCHECK(uninitialized_closure != nullptr);
-  DCHECK_ALIGNED(uninitialized_closure, alignof(lambda::Closure));
-
-  using lambda::ArtLambdaMethod;
-  using lambda::LeakingAllocator;
-
-  /*
-   * create-lambda is opcode 0x21c
-   * - vA is the target register where the closure will be stored into
-   *   (also stores into vA + 1)
-   * - vB is the method index which will be the target for a later invoke-lambda
-   */
-  const uint32_t method_idx = inst->VRegB_21c();
-  mirror::Object* receiver = nullptr;  // Always static. (see 'kStatic')
-  ArtMethod* sf_method = shadow_frame.GetMethod();
-  ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>(
-      method_idx, &receiver, sf_method, self);
-
-  uint32_t vreg_dest_closure = inst->VRegA_21c();
-
-  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
-    CHECK(self->IsExceptionPending());
-    shadow_frame.SetVReg(vreg_dest_closure, 0u);
-    shadow_frame.SetVReg(vreg_dest_closure + 1, 0u);
-    return false;
-  }
-
-  ArtLambdaMethod* initialized_lambda_method;
-  // Initialize the ArtLambdaMethod with the right data.
-  {
-    // Allocate enough memory to store a well-aligned ArtLambdaMethod.
-    // This is not the final type yet since the data starts out uninitialized.
-    LeakingAllocator::AlignedMemoryStorage<ArtLambdaMethod>* uninitialized_lambda_method =
-            LeakingAllocator::AllocateMemory<ArtLambdaMethod>(self);
-
-    std::string captured_variables_shorty = closure_builder->GetCapturedVariableShortyTypes();
-    std::string captured_variables_long_type_desc;
-
-    // Synthesize a long type descriptor from the short one.
-    for (char shorty : captured_variables_shorty) {
-      lambda::ShortyFieldType shorty_field_type(shorty);
-      if (shorty_field_type.IsObject()) {
-        // Not the true type, but good enough until we implement verifier support.
-        captured_variables_long_type_desc += "Ljava/lang/Object;";
-        UNIMPLEMENTED(FATAL) << "create-lambda with an object captured variable";
-      } else if (shorty_field_type.IsLambda()) {
-        // Not the true type, but good enough until we implement verifier support.
-        captured_variables_long_type_desc += "Ljava/lang/Runnable;";
-        UNIMPLEMENTED(FATAL) << "create-lambda with a lambda captured variable";
-      } else {
-        // The primitive types have the same length shorty or not, so this is always correct.
-        DCHECK(shorty_field_type.IsPrimitive());
-        captured_variables_long_type_desc += shorty_field_type;
-      }
-    }
-
-    // Copy strings to dynamically allocated storage. This leaks, but that's ok. Fix it later.
-    // TODO: Strings need to come from the DexFile, so they won't need their own allocations.
-    char* captured_variables_type_desc = LeakingAllocator::MakeFlexibleInstance<char>(
-        self,
-        captured_variables_long_type_desc.size() + 1);
-    strcpy(captured_variables_type_desc, captured_variables_long_type_desc.c_str());
-    char* captured_variables_shorty_copy = LeakingAllocator::MakeFlexibleInstance<char>(
-        self,
-        captured_variables_shorty.size() + 1);
-    strcpy(captured_variables_shorty_copy, captured_variables_shorty.c_str());
-
-    // After initialization, the object at the storage is well-typed. Use strong type going forward.
-    initialized_lambda_method =
-        new (uninitialized_lambda_method) ArtLambdaMethod(called_method,
-                                                          captured_variables_type_desc,
-                                                          captured_variables_shorty_copy,
-                                                          true);  // innate lambda
-  }
-
-  // Write all the closure captured variables and the closure header into the closure.
-  lambda::Closure* initialized_closure =
-      closure_builder->CreateInPlace(uninitialized_closure, initialized_lambda_method);
-
-  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, *initialized_closure, vreg_dest_closure);
-  return true;
-}
-
-// Reads out the 'ArtMethod*' stored inside of vreg and vreg+1
-//
-// Validates that the art method points to a valid lambda function, otherwise throws
-// an exception and returns null.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-static inline lambda::Closure* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
-                                                                 uint32_t vreg)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  // Lambda closures take up a consecutive pair of 2 virtual registers.
-  // On 32-bit the high bits are always 0.
-  uint32_t vc_value_lo = shadow_frame.GetVReg(vreg);
-  uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1);
-
-  uint64_t vc_value_ptr = (static_cast<uint64_t>(vc_value_hi) << BitSizeOf<uint32_t>())
-                           | vc_value_lo;
-
-  // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
-  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-  lambda::Closure* const lambda_closure = reinterpret_cast<lambda::Closure*>(vc_value_ptr);
-  DCHECK_ALIGNED(lambda_closure, alignof(lambda::Closure));
-
-  // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    ThrowNullPointerExceptionFromInterpreter();
-    return nullptr;
-  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(lambda_closure->GetTargetMethod()))) {
-    // Sanity check against data corruption.
-    return nullptr;
-  }
-
-  return lambda_closure;
-}
-
-// Forward declaration for lock annotations. See below for documentation.
-template <bool do_access_check>
-static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
-                                                               uint32_t string_idx)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
-// Find the c-string data corresponding to a dex file's string index.
-// Otherwise, returns null if not found and throws a VerifyError.
-//
-// Note that with do_access_check=false, we never return null because the verifier
-// must guard against invalid string indices.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template <bool do_access_check>
-static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
-                                                               uint32_t string_idx) {
-  ArtMethod* method = shadow_frame.GetMethod();
-  const DexFile* dex_file = method->GetDexFile();
-
-  mirror::Class* declaring_class = method->GetDeclaringClass();
-  if (!do_access_check) {
-    // MethodVerifier refuses methods with string_idx out of bounds.
-    DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
-  } else {
-    // Access checks enabled: perform string index bounds ourselves.
-    if (string_idx >= dex_file->GetHeader().string_ids_size_) {
-      ThrowVerifyError(declaring_class, "String index '%" PRIu32 "' out of bounds",
-                       string_idx);
-      return nullptr;
-    }
-  }
-
-  const char* type_string = dex_file->StringDataByIdx(string_idx);
-
-  if (UNLIKELY(type_string == nullptr)) {
-    CHECK_EQ(false, do_access_check)
-        << " verifier should've caught invalid string index " << string_idx;
-    CHECK_EQ(true, do_access_check)
-        << " string idx size check should've caught invalid string index " << string_idx;
-  }
-
-  return type_string;
-}
-
-// Handles capture-variable instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template<bool do_access_check>
-static inline bool DoCaptureVariable(Thread* self,
-                                     const Instruction* inst,
-                                     /*inout*/ShadowFrame& shadow_frame,
-                                     /*inout*/lambda::ClosureBuilder* closure_builder) {
-  DCHECK(closure_builder != nullptr);
-  using lambda::ShortyFieldType;
-  /*
-   * capture-variable is opcode 0xf6, fmt 0x21c
-   * - vA is the source register of the variable that will be captured
-   * - vB is the string ID of the variable's type that will be captured
-   */
-  const uint32_t source_vreg = inst->VRegA_21c();
-  const uint32_t string_idx = inst->VRegB_21c();
-  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
-
-  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
-                                                                                  string_idx);
-  if (UNLIKELY(type_string == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  char type_first_letter = type_string[0];
-  ShortyFieldType shorty_type;
-  if (do_access_check &&
-      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "capture-variable vB must be a valid type");
-    return false;
-  } else {
-    // Already verified that the type is valid.
-    shorty_type = ShortyFieldType(type_first_letter);
-  }
-
-  const size_t captured_variable_count = closure_builder->GetCaptureCount();
-
-  // Note: types are specified explicitly so that the closure is packed tightly.
-  switch (shorty_type) {
-    case ShortyFieldType::kBoolean: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<bool>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kByte: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int8_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kChar: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<uint16_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kShort: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int16_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kInt: {
-      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
-      closure_builder->CaptureVariablePrimitive<int32_t>(primitive_narrow_value);
-      break;
-    }
-    case ShortyFieldType::kDouble: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegDouble(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kFloat: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegFloat(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kLambda: {
-      UNIMPLEMENTED(FATAL) << " capture-variable with type kLambda";
-      // TODO: Capturing lambdas recursively will be done at a later time.
-      UNREACHABLE();
-    }
-    case ShortyFieldType::kLong: {
-      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegLong(source_vreg));
-      break;
-    }
-    case ShortyFieldType::kObject: {
-      closure_builder->CaptureVariableObject(shadow_frame.GetVRegReference(source_vreg));
-      UNIMPLEMENTED(FATAL) << " capture-variable with type kObject";
-      // TODO: finish implementing this. disabled for now since we can't track lambda refs for GC.
-      UNREACHABLE();
-    }
-
-    default:
-      LOG(FATAL) << "Invalid shorty type value " << shorty_type;
-      UNREACHABLE();
-  }
-
-  DCHECK_EQ(captured_variable_count + 1, closure_builder->GetCaptureCount());
-
-  return true;
-}
-
-// Handles capture-variable instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-template<bool do_access_check>
-static inline bool DoLiberateVariable(Thread* self,
-                                     const Instruction* inst,
-                                     size_t captured_variable_index,
-                                     /*inout*/ShadowFrame& shadow_frame) {
-  using lambda::ShortyFieldType;
-  /*
-   * liberate-variable is opcode 0xf7, fmt 0x22c
-   * - vA is the destination register
-   * - vB is the register with the lambda closure in it
-   * - vC is the string ID which needs to be a valid field type descriptor
-   */
-
-  const uint32_t dest_vreg = inst->VRegA_22c();
-  const uint32_t closure_vreg = inst->VRegB_22c();
-  const uint32_t string_idx = inst->VRegC_22c();
-  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
-
-
-  // Synthesize a long type descriptor from a shorty type descriptor list.
-  // TODO: Fix the dex encoding to contain the long and short type descriptors.
-  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
-                                                                                  string_idx);
-  if (UNLIKELY(do_access_check && type_string == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    shadow_frame.SetVReg(dest_vreg, 0);
-    return false;
-  }
-
-  char type_first_letter = type_string[0];
-  ShortyFieldType shorty_type;
-  if (do_access_check &&
-      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "liberate-variable vC must be a valid type");
-    shadow_frame.SetVReg(dest_vreg, 0);
-    return false;
-  } else {
-    // Already verified that the type is valid.
-    shorty_type = ShortyFieldType(type_first_letter);
-  }
-
-  // Check for closure being null *after* the type check.
-  // This way we can access the type info in case we fail later, to know how many vregs to clear.
-  const lambda::Closure* lambda_closure =
-      ReadLambdaClosureFromVRegsOrThrow(/*inout*/shadow_frame, closure_vreg);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-
-    // Clear the destination vreg(s) to be safe.
-    shadow_frame.SetVReg(dest_vreg, 0);
-    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-      shadow_frame.SetVReg(dest_vreg + 1, 0);
-    }
-    return false;
-  }
-
-  if (do_access_check &&
-      UNLIKELY(captured_variable_index >= lambda_closure->GetNumberOfCapturedVariables())) {
-    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "liberate-variable captured variable index %zu out of bounds",
-                     lambda_closure->GetNumberOfCapturedVariables());
-    // Clear the destination vreg(s) to be safe.
-    shadow_frame.SetVReg(dest_vreg, 0);
-    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-      shadow_frame.SetVReg(dest_vreg + 1, 0);
-    }
-    return false;
-  }
-
-  // Verify that the runtime type of the captured-variable matches the requested dex type.
-  if (do_access_check) {
-    ShortyFieldType actual_type = lambda_closure->GetCapturedShortyType(captured_variable_index);
-    if (actual_type != shorty_type) {
-      ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
-                     "cannot liberate-variable of runtime type '%c' to dex type '%c'",
-                     static_cast<char>(actual_type),
-                     static_cast<char>(shorty_type));
-
-      shadow_frame.SetVReg(dest_vreg, 0);
-      if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
-        shadow_frame.SetVReg(dest_vreg + 1, 0);
-      }
-      return false;
-    }
-
-    if (actual_type.IsLambda() || actual_type.IsObject()) {
-      UNIMPLEMENTED(FATAL) << "liberate-variable type checks needs to "
-                           << "parse full type descriptor for objects and lambdas";
-    }
-  }
-
-  // Unpack the captured variable from the closure into the correct type, then save it to the vreg.
-  if (shorty_type.IsPrimitiveNarrow()) {
-    uint32_t primitive_narrow_value =
-        lambda_closure->GetCapturedPrimitiveNarrow(captured_variable_index);
-    shadow_frame.SetVReg(dest_vreg, primitive_narrow_value);
-  } else if (shorty_type.IsPrimitiveWide()) {
-      uint64_t primitive_wide_value =
-          lambda_closure->GetCapturedPrimitiveWide(captured_variable_index);
-      shadow_frame.SetVRegLong(dest_vreg, static_cast<int64_t>(primitive_wide_value));
-  } else if (shorty_type.IsObject()) {
-    mirror::Object* unpacked_object =
-        lambda_closure->GetCapturedObject(captured_variable_index);
-    shadow_frame.SetVRegReference(dest_vreg, unpacked_object);
-
-    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack objects yet";
-  } else if (shorty_type.IsLambda()) {
-    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack lambdas yet";
-  } else {
-    LOG(FATAL) << "unreachable";
-    UNREACHABLE();
-  }
-
-  return true;
-}
-
-template<bool do_access_check>
-static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
-                                  uint16_t inst_data, JValue* result) {
-  /*
-   * invoke-lambda is opcode 0x25
-   *
-   * - vC is the closure register (both vC and vC + 1 will be used to store the closure).
-   * - vB is the number of additional registers up to |{vD,vE,vF,vG}| (4)
-   * - the rest of the registers are always var-args
-   *
-   * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
-   */
-  uint32_t vreg_closure = inst->VRegC_25x();
-  const lambda::Closure* lambda_closure =
-      ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vreg_closure);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    result->SetJ(0);
-    return false;
-  }
-
-  ArtMethod* const called_method = lambda_closure->GetTargetMethod();
-  // Invoke a non-range lambda
-  return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
-                                              result);
-}
-
-// Handles invoke-XXX/range instructions (other than invoke-lambda[-range]).
+// Handles invoke-XXX/range instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
 static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
@@ -681,7 +182,7 @@
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
   CHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
   ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
-      vtable_idx, sizeof(void*));
+      vtable_idx, kRuntimePointerSize);
   if (UNLIKELY(called_method == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
@@ -751,15 +252,20 @@
   ArtMethod* method = shadow_frame.GetMethod();
   mirror::Class* declaring_class = method->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
-  mirror::String* s = declaring_class->GetDexCacheStrings()[string_idx].Read();
-  if (UNLIKELY(s == nullptr)) {
+  DCHECK_LT(string_idx % mirror::DexCache::kDexCacheStringCacheSize,
+            declaring_class->GetDexFile().NumStringIds());
+  mirror::String* string_ptr =
+      mirror::StringDexCachePair::LookupString(declaring_class->GetDexCacheStrings(),
+                                               string_idx,
+                                               mirror::DexCache::kDexCacheStringCacheSize).Read();
+  if (UNLIKELY(string_ptr == nullptr)) {
     StackHandleScope<1> hs(self);
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
-    s = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(), string_idx,
-                                                            dex_cache);
+    string_ptr = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(),
+                                                                     string_idx,
+                                                                     dex_cache);
   }
-  return s;
+  return string_ptr;
 }
 
 // Handles div-int, div-int/2addr, div-int/li16 and div-int/lit8 instructions.
@@ -903,74 +409,6 @@
   return 3;
 }
 
-template <bool _do_check>
-static inline bool DoBoxLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
-                               uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) {
-  /*
-   * box-lambda vA, vB /// opcode 0xf8, format 22x
-   * - vA is the target register where the Object representation of the closure will be stored into
-   * - vB is a closure (made by create-lambda)
-   *   (also reads vB + 1)
-   */
-  uint32_t vreg_target_object = inst->VRegA_22x(inst_data);
-  uint32_t vreg_source_closure = inst->VRegB_22x();
-
-  lambda::Closure* lambda_closure = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
-                                                                      vreg_source_closure);
-
-  // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(lambda_closure == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  mirror::Object* closure_as_object =
-      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure);
-
-  // Failed to box the lambda, an exception was raised.
-  if (UNLIKELY(closure_as_object == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  shadow_frame.SetVRegReference(vreg_target_object, closure_as_object);
-  return true;
-}
-
-template <bool _do_check> SHARED_REQUIRES(Locks::mutator_lock_)
-static inline bool DoUnboxLambda(Thread* self,
-                                 ShadowFrame& shadow_frame,
-                                 const Instruction* inst,
-                                 uint16_t inst_data) {
-  /*
-   * unbox-lambda vA, vB, [type id] /// opcode 0xf9, format 22c
-   * - vA is the target register where the closure will be written into
-   *   (also writes vA + 1)
-   * - vB is the Object representation of the closure (made by box-lambda)
-   */
-  uint32_t vreg_target_closure = inst->VRegA_22c(inst_data);
-  uint32_t vreg_source_object = inst->VRegB_22c();
-
-  // Raise NullPointerException if object is null
-  mirror::Object* boxed_closure_object = shadow_frame.GetVRegReference(vreg_source_object);
-  if (UNLIKELY(boxed_closure_object == nullptr)) {
-    ThrowNullPointerExceptionFromInterpreter();
-    return false;
-  }
-
-  lambda::Closure* unboxed_closure = nullptr;
-  // Raise an exception if unboxing fails.
-  if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object,
-                                                            /*out*/&unboxed_closure)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-
-  DCHECK(unboxed_closure != nullptr);
-  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, *unboxed_closure, vreg_target_closure);
-  return true;
-}
-
 uint32_t FindNextInstructionFollowingException(Thread* self, ShadowFrame& shadow_frame,
     uint32_t dex_pc, const instrumentation::Instrumentation* instrumentation)
         SHARED_REQUIRES(Locks::mutator_lock_);
@@ -979,15 +417,13 @@
   __attribute__((cold))
   SHARED_REQUIRES(Locks::mutator_lock_);
 
-static inline bool TraceExecutionEnabled() {
-  // Return true if you want TraceExecution invocation before each bytecode execution.
-  return false;
-}
+// Set true if you want TraceExecution invocation before each bytecode execution.
+constexpr bool kTraceExecutionEnabled = false;
 
 static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
                                   const uint32_t dex_pc)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (TraceExecutionEnabled()) {
+  if (kTraceExecutionEnabled) {
 #define TRACE_LOG std::cerr
     std::ostringstream oss;
     oss << PrettyMethod(shadow_frame.GetMethod())
@@ -1059,72 +495,6 @@
 EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
-// Explicitly instantiate all DoCreateLambda functions.
-#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                                 \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                                    \
-bool DoCreateLambda<_do_check>(Thread* self,                                                      \
-                               const Instruction* inst,                                           \
-                               /*inout*/ShadowFrame& shadow_frame,                                \
-                               /*inout*/lambda::ClosureBuilder* closure_builder,                  \
-                               /*inout*/lambda::Closure* uninitialized_closure);
-
-EXPLICIT_DO_CREATE_LAMBDA_DECL(false);  // create-lambda
-EXPLICIT_DO_CREATE_LAMBDA_DECL(true);   // create-lambda
-#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
-
-// Explicitly instantiate all DoInvokeLambda functions.
-#define EXPLICIT_DO_INVOKE_LAMBDA_DECL(_do_check)                                    \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                 \
-bool DoInvokeLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                               uint16_t inst_data, JValue* result);
-
-EXPLICIT_DO_INVOKE_LAMBDA_DECL(false);  // invoke-lambda
-EXPLICIT_DO_INVOKE_LAMBDA_DECL(true);   // invoke-lambda
-#undef EXPLICIT_DO_INVOKE_LAMBDA_DECL
-
-// Explicitly instantiate all DoBoxLambda functions.
-#define EXPLICIT_DO_BOX_LAMBDA_DECL(_do_check)                                                \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoBoxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                            uint16_t inst_data);
-
-EXPLICIT_DO_BOX_LAMBDA_DECL(false);  // box-lambda
-EXPLICIT_DO_BOX_LAMBDA_DECL(true);   // box-lambda
-#undef EXPLICIT_DO_BOX_LAMBDA_DECL
-
-// Explicitly instantiate all DoUnBoxLambda functions.
-#define EXPLICIT_DO_UNBOX_LAMBDA_DECL(_do_check)                                                \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                            \
-bool DoUnboxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
-                              uint16_t inst_data);
-
-EXPLICIT_DO_UNBOX_LAMBDA_DECL(false);  // unbox-lambda
-EXPLICIT_DO_UNBOX_LAMBDA_DECL(true);   // unbox-lambda
-#undef EXPLICIT_DO_BOX_LAMBDA_DECL
-
-// Explicitly instantiate all DoCaptureVariable functions.
-#define EXPLICIT_DO_CAPTURE_VARIABLE_DECL(_do_check)                                    \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoCaptureVariable<_do_check>(Thread* self,                                         \
-                                  const Instruction* inst,                              \
-                                  ShadowFrame& shadow_frame,                            \
-                                  lambda::ClosureBuilder* closure_builder);
-
-EXPLICIT_DO_CAPTURE_VARIABLE_DECL(false);  // capture-variable
-EXPLICIT_DO_CAPTURE_VARIABLE_DECL(true);   // capture-variable
-#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
-
-// Explicitly instantiate all DoLiberateVariable functions.
-#define EXPLICIT_DO_LIBERATE_VARIABLE_DECL(_do_check)                                   \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
-bool DoLiberateVariable<_do_check>(Thread* self,                                        \
-                                   const Instruction* inst,                             \
-                                   size_t captured_variable_index,                      \
-                                   ShadowFrame& shadow_frame);                          \
-
-EXPLICIT_DO_LIBERATE_VARIABLE_DECL(false);  // liberate-variable
-EXPLICIT_DO_LIBERATE_VARIABLE_DECL(true);   // liberate-variable
-#undef EXPLICIT_DO_LIBERATE_LAMBDA_DECL
 }  // namespace interpreter
 }  // namespace art
 
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index f03036b..25cb4b3 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -14,21 +14,29 @@
  * limitations under the License.
  */
 
+#include "interpreter_goto_table_impl.h"
+
+// Common includes
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "stack.h"
+#include "thread.h"
+
+// Clang compiles the GOTO interpreter very slowly. So we skip it. These are the implementation
+// details only necessary when compiling it.
 #if !defined(__clang__)
-// Clang 3.4 fails to build the goto interpreter implementation.
-
-
-#include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
 #include "safe_math.h"
-
-#include <memory>  // std::unique_ptr
+#endif
 
 namespace art {
 namespace interpreter {
 
+#if !defined(__clang__)
+
 // In the following macros, we expect the following local variables exist:
 // - "self": the current Thread*.
 // - "inst" : the current Instruction*.
@@ -93,16 +101,6 @@
 #define HANDLE_INSTRUCTION_START(opcode) op_##opcode:  // NOLINT(whitespace/labels)
 #define HANDLE_INSTRUCTION_END() UNREACHABLE_CODE_CHECK()
 
-// Use with instructions labeled with kExperimental flag:
-#define HANDLE_EXPERIMENTAL_INSTRUCTION_START(opcode)                                             \
-  HANDLE_INSTRUCTION_START(opcode);                                                               \
-  DCHECK(inst->IsExperimental());                                                                 \
-  if (Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas)) {
-#define HANDLE_EXPERIMENTAL_INSTRUCTION_END()                                                     \
-  } else {                                                                                        \
-      UnexpectedOpcode(inst, shadow_frame);                                                       \
-  } HANDLE_INSTRUCTION_END();
-
 #define HANDLE_MONITOR_CHECKS()                                                                   \
   if (!DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame)) {                       \
     HANDLE_PENDING_EXCEPTION();                                                                   \
@@ -163,14 +161,14 @@
   static const void* const handlersTable[instrumentation::kNumHandlerTables][kNumPackedOpcodes] = {
     {
     // Main handler table.
-#define INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) &&op_##code,
+#define INSTRUCTION_HANDLER(o, code, n, f, i, a, v) &&op_##code,
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUCTION_HANDLER)
 #undef DEX_INSTRUCTION_LIST
 #undef INSTRUCTION_HANDLER
     }, {
     // Alternative handler table.
-#define INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) &&alt_op_##code,
+#define INSTRUCTION_HANDLER(o, code, n, f, i, a, v) &&alt_op_##code,
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUCTION_HANDLER)
 #undef DEX_INSTRUCTION_LIST
@@ -190,8 +188,6 @@
   uint16_t inst_data;
   const void* const* currentHandlersTable;
   UPDATE_HANDLER_TABLE();
-  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
-  size_t lambda_captured_variable_index = 0;
   const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
   ArtMethod* method = shadow_frame.GetMethod();
   jit::Jit* jit = Runtime::Current()->GetJit();
@@ -1668,14 +1664,6 @@
   }
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(INVOKE_LAMBDA) {
-    bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
-                                                   &result_register);
-    UPDATE_HANDLER_TABLE();
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(NEG_INT)
     shadow_frame.SetVReg(
         inst->VRegA_12x(inst_data), -shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
@@ -2457,62 +2445,6 @@
     ADVANCE(2);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) {
-    if (lambda_closure_builder == nullptr) {
-      // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
-      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-    }
-
-    // TODO: these allocations should not leak, and the lambda method should not be local.
-    lambda::Closure* lambda_closure =
-        reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
-    bool success = DoCreateLambda<do_access_check>(self,
-                                                   inst,
-                                                   /*inout*/shadow_frame,
-                                                   /*inout*/lambda_closure_builder.get(),
-                                                   /*inout*/lambda_closure);
-    lambda_closure_builder.reset(nullptr);  // reset state of variables captured
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(BOX_LAMBDA) {
-    bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(UNBOX_LAMBDA) {
-    bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CAPTURE_VARIABLE) {
-    if (lambda_closure_builder == nullptr) {
-      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-    }
-
-    bool success = DoCaptureVariable<do_access_check>(self,
-                                                      inst,
-                                                      /*inout*/shadow_frame,
-                                                      /*inout*/lambda_closure_builder.get());
-
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
-  HANDLE_EXPERIMENTAL_INSTRUCTION_START(LIBERATE_VARIABLE) {
-    bool success = DoLiberateVariable<do_access_check>(self,
-                                                           inst,
-                                                           lambda_captured_variable_index,
-                                                           /*inout*/shadow_frame);
-    // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
-    lambda_captured_variable_index++;
-    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
-  }
-  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2545,10 +2477,34 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_INSTRUCTION_START(UNUSED_F3)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_F4)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_INSTRUCTION_START(UNUSED_F5)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F6)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F7)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F8)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
+  HANDLE_INSTRUCTION_START(UNUSED_F9)
+    UnexpectedOpcode(inst, shadow_frame);
+  HANDLE_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_FA)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2597,7 +2553,7 @@
 // Note: we do not use the kReturn instruction flag here (to test the instruction is a return). The
 // compiler seems to not evaluate "(Instruction::FlagsOf(Instruction::code) & kReturn) != 0" to
 // a constant condition that would remove the "if" statement so the test is free.
-#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
+#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, i, a, v)                        \
   alt_op_##code: {                                                                            \
     if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
       Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
@@ -2613,20 +2569,40 @@
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteGotoImpl.
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
                                      ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteGotoImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
                                    ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
 
+#else
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
+  LOG(FATAL) << "UNREACHABLE";
+  UNREACHABLE();
+}
+// Explicit definitions of ExecuteGotoImpl.
+template<>
+JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
+                                    ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
+                                     ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<true, true>(Thread* self,  const DexFile::CodeItem* code_item,
+                                   ShadowFrame& shadow_frame, JValue result_register);
+template<>
+JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
+                                    ShadowFrame& shadow_frame, JValue result_register);
+#endif
+
 }  // namespace interpreter
 }  // namespace art
-
-#endif
diff --git a/runtime/interpreter/interpreter_goto_table_impl.h b/runtime/interpreter/interpreter_goto_table_impl.h
new file mode 100644
index 0000000..bb9be88
--- /dev/null
+++ b/runtime/interpreter/interpreter_goto_table_impl.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteGotoImpl(Thread* self,
+                       const DexFile::CodeItem* code_item,
+                       ShadowFrame& shadow_frame,
+                       JValue result_register) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_
diff --git a/runtime/interpreter/interpreter_mterp_impl.h b/runtime/interpreter/interpreter_mterp_impl.h
new file mode 100644
index 0000000..322df4e
--- /dev/null
+++ b/runtime/interpreter/interpreter_mterp_impl.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+// Mterp does not support transactions or access check, thus no templated versions.
+extern "C" bool ExecuteMterpImpl(Thread* self,
+                                 const DexFile::CodeItem* code_item,
+                                 ShadowFrame* shadow_frame,
+                                 JValue* result_register) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 18330ba..dd10052 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "base/stl_util.h"  // MakeUnique
+#include "interpreter_switch_impl.h"
+
+#include "base/enums.h"
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
 #include "safe_math.h"
 
-#include <memory>  // std::unique_ptr
-
 namespace art {
 namespace interpreter {
 
@@ -91,11 +91,6 @@
     }                                                                                          \
   } while (false)
 
-static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
-  DCHECK(inst->IsExperimental());
-  return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
-}
-
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                          ShadowFrame& shadow_frame, JValue result_register,
@@ -115,10 +110,6 @@
   ArtMethod* method = shadow_frame.GetMethod();
   jit::Jit* jit = Runtime::Current()->GetJit();
 
-  // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
-  // to keep this live for the scope of the entire function call.
-  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
-  size_t lambda_captured_variable_index = 0;
   do {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
@@ -283,7 +274,7 @@
         const size_t ref_idx = inst->VRegA_11x(inst_data);
         Object* obj_result = shadow_frame.GetVRegReference(ref_idx);
         if (do_assignability_check && obj_result != nullptr) {
-          size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+          PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
           Class* return_type = shadow_frame.GetMethod()->GetReturnType(true /* resolve */,
                                                                        pointer_size);
           // Re-load since it might have moved.
@@ -2332,105 +2323,13 @@
                              (inst->VRegC_22b() & 0x1f));
         inst = inst->Next_2xx();
         break;
-      case Instruction::INVOKE_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
-                                                       &result_register);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::CAPTURE_VARIABLE: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        if (lambda_closure_builder == nullptr) {
-          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-        }
-
-        PREAMBLE();
-        bool success = DoCaptureVariable<do_access_check>(self,
-                                                          inst,
-                                                          /*inout*/shadow_frame,
-                                                          /*inout*/lambda_closure_builder.get());
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::CREATE_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-
-        if (lambda_closure_builder == nullptr) {
-          // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
-          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
-        }
-
-        // TODO: these allocations should not leak, and the lambda method should not be local.
-        lambda::Closure* lambda_closure =
-            reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
-        bool success = DoCreateLambda<do_access_check>(self,
-                                                       inst,
-                                                       /*inout*/shadow_frame,
-                                                       /*inout*/lambda_closure_builder.get(),
-                                                       /*inout*/lambda_closure);
-        lambda_closure_builder.reset(nullptr);  // reset state of variables captured
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::LIBERATE_VARIABLE: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoLiberateVariable<do_access_check>(self,
-                                                           inst,
-                                                           lambda_captured_variable_index,
-                                                           /*inout*/shadow_frame);
-        // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
-        lambda_captured_variable_index++;
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::UNUSED_F4: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        CHECK(false);  // TODO(iam): Implement opcodes for lambdas
-        break;
-      }
-      case Instruction::BOX_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
-      case Instruction::UNBOX_LAMBDA: {
-        if (!IsExperimentalInstructionEnabled(inst)) {
-          UnexpectedOpcode(inst, shadow_frame);
-        }
-
-        PREAMBLE();
-        bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
-        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
-        break;
-      }
       case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-      case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
+      case Instruction::UNUSED_F3 ... Instruction::UNUSED_F9:
+      case Instruction::UNUSED_FC ... Instruction::UNUSED_FF:
       case Instruction::UNUSED_79:
       case Instruction::UNUSED_7A:
+      case Instruction::INVOKE_POLYMORPHIC:
+      case Instruction::INVOKE_POLYMORPHIC_RANGE:
         UnexpectedOpcode(inst, shadow_frame);
     }
   } while (!interpret_one_instruction);
@@ -2440,19 +2339,19 @@
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteSwitchImpl.
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteSwitchImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
                                       ShadowFrame& shadow_frame, JValue result_register,
                                       bool interpret_one_instruction);
-template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
+template HOT_ATTR
 JValue ExecuteSwitchImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
                                        ShadowFrame& shadow_frame, JValue result_register,
                                        bool interpret_one_instruction);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteSwitchImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
                                      ShadowFrame& shadow_frame, JValue result_register,
                                      bool interpret_one_instruction);
-template SHARED_REQUIRES(Locks::mutator_lock_)
+template
 JValue ExecuteSwitchImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
                                       ShadowFrame& shadow_frame, JValue result_register,
                                       bool interpret_one_instruction);
diff --git a/runtime/interpreter/interpreter_switch_impl.h b/runtime/interpreter/interpreter_switch_impl.h
new file mode 100644
index 0000000..90ec908
--- /dev/null
+++ b/runtime/interpreter/interpreter_switch_impl.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "jvalue.h"
+
+namespace art {
+
+class ShadowFrame;
+class Thread;
+
+namespace interpreter {
+
+template<bool do_access_check, bool transaction_active>
+JValue ExecuteSwitchImpl(Thread* self,
+                         const DexFile::CodeItem* code_item,
+                         ShadowFrame& shadow_frame,
+                         JValue result_register,
+                         bool interpret_one_instruction) SHARED_REQUIRES(Locks::mutator_lock_);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_
diff --git a/runtime/interpreter/mterp/arm/alt_stub.S b/runtime/interpreter/mterp/arm/alt_stub.S
index 92ae0c6..9db5bf7 100644
--- a/runtime/interpreter/mterp/arm/alt_stub.S
+++ b/runtime/interpreter/mterp/arm/alt_stub.S
@@ -4,9 +4,9 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (${opnum} * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
diff --git a/runtime/interpreter/mterp/arm/binopLit8.S b/runtime/interpreter/mterp/arm/binopLit8.S
index b8f0d92..7c9c631 100644
--- a/runtime/interpreter/mterp/arm/binopLit8.S
+++ b/runtime/interpreter/mterp/arm/binopLit8.S
@@ -1,10 +1,14 @@
-%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+%default {"extract":"asr     r1, r3, #8", "result":"r0", "chkzero":"0"}
     /*
      * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = r0 op r1".
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -17,14 +21,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    $extract                            @ optional; typically r1<- ssssssCC (sign extended)
     .if $chkzero
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    $preinstr                           @ optional op; may set condition codes
     $instr                              @ $result<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG $result, r9                @ vAA<- $result
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit8.S b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
index b84684a..035510d 100644
--- a/runtime/interpreter/mterp/arm/op_add_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"add     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"add     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit8.S b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
index d5783e5..af746b5 100644
--- a/runtime/interpreter/mterp/arm/op_and_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"and     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"and     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit8.S b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
index 2d85038..9882bfc 100644
--- a/runtime/interpreter/mterp/arm/op_or_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"orr     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"orr     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
index 2ee11e1..dc953dc 100644
--- a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"rsb     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"rsb     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
index 6a48bfc..60a1498 100644
--- a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
index 60fe5fc..c2f6cb0 100644
--- a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
index 40a4435..5554eb0 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
+%include "arm/binopLit8.S" {"extract":"ubfx    r1, r3, #8, #5", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
index 46bb712..97d0b9e 100644
--- a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
@@ -1 +1 @@
-%include "arm/binopLit8.S" {"instr":"eor     r0, r0, r1"}
+%include "arm/binopLit8.S" {"extract":"", "instr":"eor     r0, r0, r3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/alt_stub.S b/runtime/interpreter/mterp/arm64/alt_stub.S
index 9b8b16d..3a463fe 100644
--- a/runtime/interpreter/mterp/arm64/alt_stub.S
+++ b/runtime/interpreter/mterp/arm64/alt_stub.S
@@ -4,9 +4,9 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
     adr    lr, artMterpAsmInstructionStart + (${opnum} * 128)       // Addr of primary handler.
     mov    x0, xSELF
     add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S
index 326c657..dfa3169 100644
--- a/runtime/interpreter/mterp/arm64/binopLit8.S
+++ b/runtime/interpreter/mterp/arm64/binopLit8.S
@@ -1,10 +1,14 @@
-%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+%default {"extract": "asr     w1, w3, #8", "preinstr":"", "result":"w0", "chkzero":"0"}
     /*
      * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = w0 op w1".
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -13,11 +17,11 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    $extract                            // optional; typically w1<- ssssssCC (sign extended)
     .if $chkzero
     cbz     w1, common_errDivideByZero
     .endif
diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S
index 0d57cbf..04236ad 100644
--- a/runtime/interpreter/mterp/arm64/fbinop2addr.S
+++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S
@@ -7,8 +7,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     $instr                              // s2<- op
diff --git a/runtime/interpreter/mterp/arm64/fcmp.S b/runtime/interpreter/mterp/arm64/fcmp.S
index a45e789..cad6318 100644
--- a/runtime/interpreter/mterp/arm64/fcmp.S
+++ b/runtime/interpreter/mterp/arm64/fcmp.S
@@ -1,4 +1,4 @@
-%default {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1","cond":"le"}
+%default {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"}
     /*
      * Compare two floating-point values.  Puts 0, 1, or -1 into the
      * destination register based on the results of the comparison.
@@ -10,10 +10,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG$wide $r1, w2
     GET_VREG$wide $r2, w3
-    mov     w0, #$default_val
     fcmp $r1, $r2
-    csneg w0, w0, w0, $cond
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, $cond
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index 2d3a11e..7628ed3 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -234,7 +234,7 @@
 #if MTERP_LOGGING
     mov  x0, xSELF
     add  x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm x2, xINST, 0, 31
+    sxtw x2, wINST
     bl MterpLogOSR
 #endif
     mov  x0, #1                         // Signal normal return
diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S
index 9f5ad1e..aed830b 100644
--- a/runtime/interpreter/mterp/arm64/funopNarrow.S
+++ b/runtime/interpreter/mterp/arm64/funopNarrow.S
@@ -8,10 +8,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG $srcreg, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG $tgtreg, w4                // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S
index 411396b..6fddfea 100644
--- a/runtime/interpreter/mterp/arm64/funopNarrower.S
+++ b/runtime/interpreter/mterp/arm64/funopNarrower.S
@@ -7,10 +7,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE $srcreg, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG $tgtreg, w4                // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S
index d83b39c..409e26b 100644
--- a/runtime/interpreter/mterp/arm64/funopWide.S
+++ b/runtime/interpreter/mterp/arm64/funopWide.S
@@ -7,10 +7,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE $srcreg, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE $tgtreg, w4           // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S
index 50a73f1..4c91ebc 100644
--- a/runtime/interpreter/mterp/arm64/funopWider.S
+++ b/runtime/interpreter/mterp/arm64/funopWider.S
@@ -7,10 +7,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG $srcreg, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE $tgtreg, w4           // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 4257200..c791eb5 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -272,6 +272,14 @@
 .endm
 
 /*
+ * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
+ * Used to avoid an extra instruction in int-to-long.
+ */
+.macro GET_VREG_S reg, vreg
+    ldrsw   \reg, [xFP, \vreg, uxtw #2]
+.endm
+
+/*
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
index 196ea99..2dfb8b9 100644
--- a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"add     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"add     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
index 167b40e..495b5cd 100644
--- a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"and     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"and     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmp_long.S b/runtime/interpreter/mterp/arm64/op_cmp_long.S
index 982e5b1..c4ad984 100644
--- a/runtime/interpreter/mterp/arm64/op_cmp_long.S
+++ b/runtime/interpreter/mterp/arm64/op_cmp_long.S
@@ -5,8 +5,8 @@
     GET_VREG_WIDE x1, w2
     GET_VREG_WIDE x2, w3
     cmp     x1, x2
-    csinc   w0, wzr, wzr, eq
-    csneg   w0, w0, w0, ge
+    cset    w0, ne
+    cneg    w0, w0, lt
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG w0, w4
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_double.S b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
index 14f9ff8..30cb7eb 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpg_double.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"1", "cond":"pl"}
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"cc"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_float.S b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
index 3a20cba..ba23f43 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpg_float.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"1", "cond":"pl"}
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"cc"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_double.S b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
index 06d5917..c739685 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpl_double.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"-1", "cond":"le"}
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"lt"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_float.S b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
index d87d086..32a9319 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpl_float.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1", "cond":"le"}
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"}
diff --git a/runtime/interpreter/mterp/arm64/op_const_16.S b/runtime/interpreter/mterp/arm64/op_const_16.S
index 27f5273..f0e8192 100644
--- a/runtime/interpreter/mterp/arm64/op_const_16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_16.S
@@ -1,5 +1,5 @@
     /* const/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance xPC, load wINST
     SET_VREG w0, w3                     // vAA<- w0
diff --git a/runtime/interpreter/mterp/arm64/op_const_4.S b/runtime/interpreter/mterp/arm64/op_const_4.S
index 04cd4f8..9a36115 100644
--- a/runtime/interpreter/mterp/arm64/op_const_4.S
+++ b/runtime/interpreter/mterp/arm64/op_const_4.S
@@ -1,8 +1,7 @@
     /* const/4 vA, #+B */
-    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    sbfx    w1, wINST, #12, #4          // w1<- sssssssB
     ubfx    w0, wINST, #8, #4           // w0<- A
     FETCH_ADVANCE_INST 1                // advance xPC, load wINST
-    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  // ip<- opcode from xINST
     SET_VREG w1, w0                     // fp[A]<- w1
     GOTO_OPCODE ip                      // execute next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_high16.S b/runtime/interpreter/mterp/arm64/op_const_high16.S
index dd51ce1..3a9edff 100644
--- a/runtime/interpreter/mterp/arm64/op_const_high16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_high16.S
@@ -1,5 +1,5 @@
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended)
     lsr     w3, wINST, #8               // r3<- AA
     lsl     w0, w0, #16                 // r0<- BBBB0000
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
index e43628b..553d481 100644
--- a/runtime/interpreter/mterp/arm64/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
@@ -1,8 +1,7 @@
     /* const-wide/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S x0, 1                       // x0<- ssssssssssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    sbfm    x0, x0, 0, 31
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
index 527f7d8..9dc4fc3 100644
--- a/runtime/interpreter/mterp/arm64/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
@@ -1,10 +1,9 @@
     /* const-wide/32 vAA, #+BBBBbbbb */
-    FETCH w0, 1                         // w0<- 0000bbbb (low)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (low)
     lsr     w3, wINST, #8               // w3<- AA
-    FETCH_S w2, 2                       // w2<- ssssBBBB (high)
+    FETCH_S x2, 2                       // x2<- ssssssssssssBBBB (high)
     FETCH_ADVANCE_INST 3                // advance rPC, load wINST
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    orr     w0, w0, w2, lsl #16         // w0<- BBBBbbbb
-    sbfm    x0, x0, 0, 31
+    orr     x0, x0, x2, lsl #16         // x0<- ssssssssBBBBbbbb
     SET_VREG_WIDE x0, w3
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_fill_array_data.S b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
index f50d9e4..86fa6db 100644
--- a/runtime/interpreter/mterp/arm64/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
@@ -1,11 +1,11 @@
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w1, w0, w1, lsl #16         // w1<- BBBBbbbb
+    orr     x1, x0, x1, lsl #16         // x1<- ssssssssBBBBbbbb
     GET_VREG w0, w3                     // w0<- vAA (array object)
-    add     x1, xPC, w1, lsl #1         // w1<- PC + BBBBbbbb*2 (array data off.)
+    add     x1, xPC, x1, lsl #1         // x1<- PC + ssssssssBBBBbbbb*2 (array data off.)
     bl      MterpFillArrayData          // (obj, payload)
     cbz     w0, MterpPossibleException      // exception?
     FETCH_ADVANCE_INST 3                // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_if_eqz.S b/runtime/interpreter/mterp/arm64/op_if_eqz.S
index 1d3202e1..47c1dee 100644
--- a/runtime/interpreter/mterp/arm64/op_if_eqz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_eqz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"eq" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"cbz     w2," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gez.S b/runtime/interpreter/mterp/arm64/op_if_gez.S
index 8e3abd3..087e094 100644
--- a/runtime/interpreter/mterp/arm64/op_if_gez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_gez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"ge" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"tbz     w2, #31," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gtz.S b/runtime/interpreter/mterp/arm64/op_if_gtz.S
index a4f2f6b..476b265 100644
--- a/runtime/interpreter/mterp/arm64/op_if_gtz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_gtz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"gt" }
+%include "arm64/zcmp.S" { "branch":"b.gt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_lez.S b/runtime/interpreter/mterp/arm64/op_if_lez.S
index c1425fdd..2717a60 100644
--- a/runtime/interpreter/mterp/arm64/op_if_lez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_lez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"le" }
+%include "arm64/zcmp.S" { "branch":"b.le" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ltz.S b/runtime/interpreter/mterp/arm64/op_if_ltz.S
index 03cd3d6..86089c1 100644
--- a/runtime/interpreter/mterp/arm64/op_if_ltz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_ltz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"lt" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"tbnz    w2, #31," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_nez.S b/runtime/interpreter/mterp/arm64/op_if_nez.S
index 21e1bc2..efacc88 100644
--- a/runtime/interpreter/mterp/arm64/op_if_nez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_nez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"ne" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"cbnz    w2," }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S
index 45c68a3..699b2c4 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S
@@ -5,8 +5,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     $load   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     $extend
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
index 2480d2d..e9388e4 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
@@ -3,9 +3,8 @@
     FETCH w4, 1                         // w4<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cbz     w3, common_errNullObject        // object was null
-    add     x4, x3, x4                  // create direct pointer
-    ldr     x0, [x4]
+    cbz     w3, common_errNullObject    // object was null
+    ldr     x0, [x3, x4]                // x0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG_WIDE x0, w2
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S
index 647bc75..a56705a 100644
--- a/runtime/interpreter/mterp/arm64/op_instance_of.S
+++ b/runtime/interpreter/mterp/arm64/op_instance_of.S
@@ -13,8 +13,7 @@
     mov       x3, xSELF                 // w3<- self
     bl        MterpInstanceOf           // (index, &obj, method, self)
     ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    lsr       w2, wINST, #8             // w2<- A+
-    and       w2, w2, #15               // w2<- A
+    ubfx      w2, wINST, #8, #4         // w2<- A
     PREFETCH_INST 2
     cbnz      x1, MterpException
     ADVANCE 2                           // advance rPC
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
index 13d2120..45e3112 100644
--- a/runtime/interpreter/mterp/arm64/op_int_to_long.S
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S
@@ -1 +1,8 @@
-%include "arm64/funopWider.S" {"instr":"sbfm x0, x0, 0, 31", "srcreg":"w0", "tgtreg":"x0"}
+    /* int-to-long vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_S x0, w3                   // x0<- sign_extend(fp[B])
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4                // fp[A]<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_quick.S b/runtime/interpreter/mterp/arm64/op_iput_quick.S
index 2afc51b..e95da76 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_quick.S
@@ -5,7 +5,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
index 27b5dc5..6cec363 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
@@ -3,11 +3,9 @@
     FETCH w3, 1                         // w3<- field byte offset
     GET_VREG w2, w2                     // w2<- fp[B], the object pointer
     ubfx    w0, wINST, #8, #4           // w0<- A
-    cmp     w2, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w2, common_errNullObject    // object was null
     GET_VREG_WIDE x0, w0                // x0-< fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
-    add     x1, x2, x3                  // create a direct pointer
-    str     x0, [x1]
+    str     x0, [x2, x3]                // obj.field<- x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_int.S b/runtime/interpreter/mterp/arm64/op_long_to_int.S
index 360a69b..73f58d8 100644
--- a/runtime/interpreter/mterp/arm64/op_long_to_int.S
+++ b/runtime/interpreter/mterp/arm64/op_long_to_int.S
@@ -1 +1,2 @@
-%include "arm64/funopNarrower.S" {"instr":"", "srcreg":"x0", "tgtreg":"w0"}
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "arm64/op_move.S"
diff --git a/runtime/interpreter/mterp/arm64/op_neg_double.S b/runtime/interpreter/mterp/arm64/op_neg_double.S
index e9064c4..d77859d 100644
--- a/runtime/interpreter/mterp/arm64/op_neg_double.S
+++ b/runtime/interpreter/mterp/arm64/op_neg_double.S
@@ -1 +1 @@
-%include "arm64/unopWide.S" {"preinstr":"mov x1, #0x8000000000000000", "instr":"add     x0, x0, x1"}
+%include "arm64/unopWide.S" {"instr":"eor     x0, x0, #0x8000000000000000"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_float.S b/runtime/interpreter/mterp/arm64/op_neg_float.S
index 49d51af..6652aec 100644
--- a/runtime/interpreter/mterp/arm64/op_neg_float.S
+++ b/runtime/interpreter/mterp/arm64/op_neg_float.S
@@ -1 +1 @@
-%include "arm64/unop.S" {"preinstr":"mov w4, #0x80000000", "instr":"add     w0, w0, w4"}
+%include "arm64/unop.S" {"instr":"eor     w0, w0, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
index 51675f8..7cb26b7 100644
--- a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"orr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"orr     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index 1456f1a..408e030 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -9,12 +9,12 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
     GET_VREG w1, w3                     // w1<- vAA
-    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
+    sxtw    xINST, w0
     b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
index 0b91891..95f81c5 100644
--- a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
@@ -1,12 +1,10 @@
     /* rem vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     bl  fmodf
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG s0, w9
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S
index bd0f237..3062a3f 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
index b4671d2..9a7e09f 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
index 4dd32e0..9c19b55 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S
index c214a18..493b740 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
index 3c1484b..6efe8ee 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
index 26d5024..c7b61df 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S
index bb8382b..005452b 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
index dbccb99..1cb8cb7 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
index 35090c4..555ed4e 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"ubfx    w1, w3, #8, #5", "instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
index 6d187b5..1d3d93e 100644
--- a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"instr":"eor     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"extract":"", "instr":"eor     w0, w0, w3, asr #8"}
diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S
index 6306fca..dcb2fb7 100644
--- a/runtime/interpreter/mterp/arm64/shiftWide.S
+++ b/runtime/interpreter/mterp/arm64/shiftWide.S
@@ -12,8 +12,7 @@
     and      w1, w0, #255                // w1<- BB
     GET_VREG_WIDE x1, w1                // x1<- vBB
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and      x2, x2, #63                 // Mask low 6
-    $opcode  x0, x1, x2                 // Do the shift.
+    $opcode  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3                // vAA<- x0
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
index 77d104a..b860dfd 100644
--- a/runtime/interpreter/mterp/arm64/shiftWide2addr.S
+++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
@@ -8,8 +8,7 @@
     GET_VREG w1, w1                     // x1<- vB
     GET_VREG_WIDE x0, w2                // x0<- vA
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     x1, x1, #63                 // Mask low 6 bits.
-    $opcode x0, x0, x1
+    $opcode x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w2               // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/unop.S b/runtime/interpreter/mterp/arm64/unop.S
index 474a961..e681968 100644
--- a/runtime/interpreter/mterp/arm64/unop.S
+++ b/runtime/interpreter/mterp/arm64/unop.S
@@ -1,4 +1,3 @@
-%default {"preinstr":""}
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = op w0".
@@ -11,7 +10,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-    $preinstr                           // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     $instr                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm64/unopWide.S b/runtime/interpreter/mterp/arm64/unopWide.S
index 109302a..6ee4f92 100644
--- a/runtime/interpreter/mterp/arm64/unopWide.S
+++ b/runtime/interpreter/mterp/arm64/unopWide.S
@@ -1,4 +1,4 @@
-%default {"instr":"sub x0, xzr, x0", "preinstr":""}
+%default {"instr":"sub x0, xzr, x0"}
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = op x0".
@@ -10,7 +10,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    $preinstr
     $instr
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index b303e6a..510a3c1 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -1,3 +1,4 @@
+%default { "compare":"1" }
     /*
      * Generic one-operand compare-and-branch operation.  Provide a "condition"
      * fragment that specifies the comparison to perform.
@@ -8,8 +9,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if ${compare}
     cmp     w2, #0                      // compare (vA, 0)
-    b.${condition} MterpCommonTakenBranchNoFlags
+    .endif
+    ${branch} MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
diff --git a/runtime/interpreter/mterp/config_arm b/runtime/interpreter/mterp/config_arm
index 436dcd2..b6caf11 100644
--- a/runtime/interpreter/mterp/config_arm
+++ b/runtime/interpreter/mterp/config_arm
@@ -279,13 +279,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index 57206d2..c5e06c7 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64
@@ -20,9 +20,6 @@
 handler-style computed-goto
 handler-size 128
 
-# source for alternate entry stub
-asm-alt-stub arm64/alt_stub.S
-
 # file header and basic definitions
 import arm64/header.S
 
@@ -280,13 +277,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
@@ -295,5 +292,12 @@
     # op op_unused_ff FALLBACK
 op-end
 
-# common subroutines for asm
+# common subroutines for asm; we emit the footer before alternate
+# entry stubs, so that TBZ/TBNZ from ops can reach targets in footer
 import arm64/footer.S
+
+# source for alternate entry stub
+asm-alt-stub arm64/alt_stub.S
+
+# emit alternate entry stubs
+alt-ops
diff --git a/runtime/interpreter/mterp/config_mips b/runtime/interpreter/mterp/config_mips
index c6292c3..515cb0b 100644
--- a/runtime/interpreter/mterp/config_mips
+++ b/runtime/interpreter/mterp/config_mips
@@ -279,13 +279,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_mips64 b/runtime/interpreter/mterp/config_mips64
index c40c007..aafd248 100644
--- a/runtime/interpreter/mterp/config_mips64
+++ b/runtime/interpreter/mterp/config_mips64
@@ -279,13 +279,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
index f1501e1..64d8ee8 100644
--- a/runtime/interpreter/mterp/config_x86
+++ b/runtime/interpreter/mterp/config_x86
@@ -283,13 +283,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/config_x86_64 b/runtime/interpreter/mterp/config_x86_64
index 1d7eb03..7c357db 100644
--- a/runtime/interpreter/mterp/config_x86_64
+++ b/runtime/interpreter/mterp/config_x86_64
@@ -283,13 +283,13 @@
     # op op_iget_byte_quick FALLBACK
     # op op_iget_char_quick FALLBACK
     # op op_iget_short_quick FALLBACK
-    op op_invoke_lambda FALLBACK
+    # op op_unused_f3 FALLBACK
     # op op_unused_f4 FALLBACK
-    op op_capture_variable FALLBACK
-    op op_create_lambda FALLBACK
-    op op_liberate_variable FALLBACK
-    op op_box_lambda FALLBACK
-    op op_unbox_lambda FALLBACK
+    # op op_unused_f5 FALLBACK
+    # op op_unused_f6 FALLBACK
+    # op op_unused_f7 FALLBACK
+    # op op_unused_f8 FALLBACK
+    # op op_unused_f9 FALLBACK
     # op op_unused_fa FALLBACK
     # op op_unused_fb FALLBACK
     # op op_unused_fc FALLBACK
diff --git a/runtime/interpreter/mterp/mips/alt_stub.S b/runtime/interpreter/mterp/mips/alt_stub.S
index 4598061..de13313 100644
--- a/runtime/interpreter/mterp/mips/alt_stub.S
+++ b/runtime/interpreter/mterp/mips/alt_stub.S
@@ -4,10 +4,10 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (${opnum} * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
diff --git a/runtime/interpreter/mterp/mips/bincmp.S b/runtime/interpreter/mterp/mips/bincmp.S
index 70057f6..68df5c3 100644
--- a/runtime/interpreter/mterp/mips/bincmp.S
+++ b/runtime/interpreter/mterp/mips/bincmp.S
@@ -1,7 +1,6 @@
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -9,29 +8,11 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    b${revcmp} a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_${opcode}_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
-%break
-
-.L_${opcode}_finish:
+    b${condition} a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/entry.S b/runtime/interpreter/mterp/mips/entry.S
index 5771a4f..c806a67 100644
--- a/runtime/interpreter/mterp/mips/entry.S
+++ b/runtime/interpreter/mterp/mips/entry.S
@@ -60,6 +60,12 @@
     /* Starting ibase */
     lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
 
+    /* Set up for backwards branches & osr profiling */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpSetUpHotnessCountdown)        # (method, shadow_frame)
+    move    rPROFILE, v0                   # Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST()                           # load rINST from rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips/footer.S b/runtime/interpreter/mterp/mips/footer.S
index 083dc15..1363751 100644
--- a/runtime/interpreter/mterp/mips/footer.S
+++ b/runtime/interpreter/mterp/mips/footer.S
@@ -112,20 +112,110 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
  */
-MterpCheckSuspendAndContinue:
-    lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)  # refresh rIBASE
+MterpCommonTakenBranchNoFlags:
+    bgtz    rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    li      t0, JIT_CHECK_OSR
+    beq     rPROFILE, t0, .L_osr_check
+    blt     rPROFILE, t0, .L_resume_backward_branch
+    subu    rPROFILE, 1
+    beqz    rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    REFRESH_IBASE()
+    addu    a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
     and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bnez    ra, 1f
+    bnez    ra, .L_suspend_request_pending
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
-1:
+
+.L_suspend_request_pending:
     EXPORT_PC()
     move    a0, rSELF
     JAL(MterpSuspendCheck)              # (self)
     bnez    v0, MterpFallback
+    REFRESH_IBASE()                     # might have changed during suspend
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_no_count_backwards:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bne     rPROFILE, t0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beq     rPROFILE, t0, .L_check_osr_forward
+.L_resume_forward_branch:
+    add     a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    lw      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST(2)
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
 
@@ -172,6 +262,26 @@
     sw      v1, 4(a2)
     li      v0, 1                       # signal return to caller.
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
 /* Restore from the stack and return. Frame size = STACK_SIZE */
     STACK_LOAD_FULL()
     jalr    zero, ra
diff --git a/runtime/interpreter/mterp/mips/header.S b/runtime/interpreter/mterp/mips/header.S
index 37ab21d..a3a6744 100644
--- a/runtime/interpreter/mterp/mips/header.S
+++ b/runtime/interpreter/mterp/mips/header.S
@@ -51,7 +51,11 @@
    s2   rSELF     self (Thread) pointer
    s3   rIBASE    interpreted instruction base pointer, used for computed goto
    s4   rINST     first 16-bit code unit of current instruction
+   s5   rOBJ      object pointer
    s6   rREFS     base of object references in shadow frame (ideally, we'll get rid of this later).
+   s7   rTEMP     used as temp storage that can survive a function call
+   s8   rPROFILE  branch profiling countdown
+
 */
 
 /* single-purpose registers, given names for clarity */
@@ -63,6 +67,7 @@
 #define rOBJ s5
 #define rREFS s6
 #define rTEMP s7
+#define rPROFILE s8
 
 #define rARG0 a0
 #define rARG1 a1
@@ -160,7 +165,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -482,3 +487,6 @@
     STACK_LOAD(s8, 120); \
     STACK_LOAD(ra, 124); \
     DELETE_STACK(STACK_SIZE)
+
+#define REFRESH_IBASE() \
+    lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
diff --git a/runtime/interpreter/mterp/mips/op_goto.S b/runtime/interpreter/mterp/mips/op_goto.S
index d6f21c9..57182a5 100644
--- a/runtime/interpreter/mterp/mips/op_goto.S
+++ b/runtime/interpreter/mterp/mips/op_goto.S
@@ -5,34 +5,6 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-#if MTERP_PROFILE_BRANCHES
     sll       a0, rINST, 16                #  a0 <- AAxx0000
     sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a2, rINST, rINST             #  a2 <- byte offset
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    /* If backwards branch refresh rIBASE */
-    bgez      a2, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    sll       a0, rINST, 16                #  a0 <- AAxx0000
-    sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
-    addu      a2, rINST, rINST             #  a2 <- byte offset
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    /* If backwards branch refresh rIBASE */
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_goto_16.S b/runtime/interpreter/mterp/mips/op_goto_16.S
index cec4432..06c96cd 100644
--- a/runtime/interpreter/mterp/mips/op_goto_16.S
+++ b/runtime/interpreter/mterp/mips/op_goto_16.S
@@ -5,30 +5,5 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset, flags set
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
-    addu      a1, rINST, rINST             #  a1 <- byte offset, flags set
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_goto_32.S b/runtime/interpreter/mterp/mips/op_goto_32.S
index 083acd1..67f52e9 100644
--- a/runtime/interpreter/mterp/mips/op_goto_32.S
+++ b/runtime/interpreter/mterp/mips/op_goto_32.S
@@ -8,36 +8,8 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
     sll       a1, a1, 16
     or        rINST, a0, a1                #  rINST <- AAAAaaaa
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
-    FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_if_eq.S b/runtime/interpreter/mterp/mips/op_if_eq.S
index e7190d8..d6f9987 100644
--- a/runtime/interpreter/mterp/mips/op_if_eq.S
+++ b/runtime/interpreter/mterp/mips/op_if_eq.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"ne" }
+%include "mips/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/mips/op_if_eqz.S b/runtime/interpreter/mterp/mips/op_if_eqz.S
index 0a78fd9..c52b76a 100644
--- a/runtime/interpreter/mterp/mips/op_if_eqz.S
+++ b/runtime/interpreter/mterp/mips/op_if_eqz.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"ne" }
+%include "mips/zcmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ge.S b/runtime/interpreter/mterp/mips/op_if_ge.S
index b2629ba..bd06ff5 100644
--- a/runtime/interpreter/mterp/mips/op_if_ge.S
+++ b/runtime/interpreter/mterp/mips/op_if_ge.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"lt" }
+%include "mips/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gez.S b/runtime/interpreter/mterp/mips/op_if_gez.S
index b02f677..549231a 100644
--- a/runtime/interpreter/mterp/mips/op_if_gez.S
+++ b/runtime/interpreter/mterp/mips/op_if_gez.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"lt" }
+%include "mips/zcmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gt.S b/runtime/interpreter/mterp/mips/op_if_gt.S
index f620d4a..0be3091 100644
--- a/runtime/interpreter/mterp/mips/op_if_gt.S
+++ b/runtime/interpreter/mterp/mips/op_if_gt.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"le" }
+%include "mips/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gtz.S b/runtime/interpreter/mterp/mips/op_if_gtz.S
index 5e5dd70..5c7bcc4 100644
--- a/runtime/interpreter/mterp/mips/op_if_gtz.S
+++ b/runtime/interpreter/mterp/mips/op_if_gtz.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"le" }
+%include "mips/zcmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_le.S b/runtime/interpreter/mterp/mips/op_if_le.S
index a4e8b1a..c35c1a2 100644
--- a/runtime/interpreter/mterp/mips/op_if_le.S
+++ b/runtime/interpreter/mterp/mips/op_if_le.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"gt" }
+%include "mips/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/mips/op_if_lez.S b/runtime/interpreter/mterp/mips/op_if_lez.S
index af551a6..3dc6543 100644
--- a/runtime/interpreter/mterp/mips/op_if_lez.S
+++ b/runtime/interpreter/mterp/mips/op_if_lez.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"gt" }
+%include "mips/zcmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/mips/op_if_lt.S b/runtime/interpreter/mterp/mips/op_if_lt.S
index f33b9a4..3f3386c 100644
--- a/runtime/interpreter/mterp/mips/op_if_lt.S
+++ b/runtime/interpreter/mterp/mips/op_if_lt.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"ge" }
+%include "mips/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ltz.S b/runtime/interpreter/mterp/mips/op_if_ltz.S
index 18fcb1d..e6d6ed6 100644
--- a/runtime/interpreter/mterp/mips/op_if_ltz.S
+++ b/runtime/interpreter/mterp/mips/op_if_ltz.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"ge" }
+%include "mips/zcmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ne.S b/runtime/interpreter/mterp/mips/op_if_ne.S
index e0a102b..3d7bf35 100644
--- a/runtime/interpreter/mterp/mips/op_if_ne.S
+++ b/runtime/interpreter/mterp/mips/op_if_ne.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"eq" }
+%include "mips/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/mips/op_if_nez.S b/runtime/interpreter/mterp/mips/op_if_nez.S
index d1866a0..d121eae 100644
--- a/runtime/interpreter/mterp/mips/op_if_nez.S
+++ b/runtime/interpreter/mterp/mips/op_if_nez.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"eq" }
+%include "mips/zcmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/mips/op_packed_switch.S b/runtime/interpreter/mterp/mips/op_packed_switch.S
index 93fae97..ffa4f47 100644
--- a/runtime/interpreter/mterp/mips/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips/op_packed_switch.S
@@ -9,7 +9,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
@@ -19,39 +18,4 @@
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL($func)                             #  a0 <- code-unit branch offset
     move      rINST, v0
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, .L${opcode}_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-#else
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
-    GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
-    GET_VREG(a1, a3)                       #  a1 <- vAA
-    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
-    JAL($func)                             #  a0 <- code-unit branch offset
-    move      rINST, v0
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
-
-%break
-
-.L${opcode}_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/zcmp.S b/runtime/interpreter/mterp/mips/zcmp.S
index 1fa1385..8d3a198 100644
--- a/runtime/interpreter/mterp/mips/zcmp.S
+++ b/runtime/interpreter/mterp/mips/zcmp.S
@@ -1,32 +1,16 @@
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    b${revcmp} a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    b${condition} a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/alt_stub.S b/runtime/interpreter/mterp/mips64/alt_stub.S
index bd76a1b..12fa84d 100644
--- a/runtime/interpreter/mterp/mips64/alt_stub.S
+++ b/runtime/interpreter/mterp/mips64/alt_stub.S
@@ -4,11 +4,11 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (${opnum} * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
diff --git a/runtime/interpreter/mterp/mips64/bincmp.S b/runtime/interpreter/mterp/mips64/bincmp.S
index aa5e74b..07b1210 100644
--- a/runtime/interpreter/mterp/mips64/bincmp.S
+++ b/runtime/interpreter/mterp/mips64/bincmp.S
@@ -12,21 +12,9 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    b${condition}c a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    b${condition}c a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/entry.S b/runtime/interpreter/mterp/mips64/entry.S
index ae6c26b..cc48d45 100644
--- a/runtime/interpreter/mterp/mips64/entry.S
+++ b/runtime/interpreter/mterp/mips64/entry.S
@@ -57,6 +57,8 @@
     .cfi_rel_offset 20, STACK_OFFSET_S4
     sd      s5, STACK_OFFSET_S5(sp)
     .cfi_rel_offset 21, STACK_OFFSET_S5
+    sd      s6, STACK_OFFSET_S6(sp)
+    .cfi_rel_offset 22, STACK_OFFSET_S6
 
     /* Remember the return register */
     sd      a3, SHADOWFRAME_RESULT_REGISTER_OFFSET(a2)
@@ -77,6 +79,12 @@
     /* Starting ibase */
     REFRESH_IBASE
 
+    /* Set up for backwards branches & osr profiling */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpSetUpHotnessCountdown
+    move    rPROFILE, v0                # Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST
     GET_INST_OPCODE v0
diff --git a/runtime/interpreter/mterp/mips64/footer.S b/runtime/interpreter/mterp/mips64/footer.S
index 14d5fe0..9994169 100644
--- a/runtime/interpreter/mterp/mips64/footer.S
+++ b/runtime/interpreter/mterp/mips64/footer.S
@@ -71,23 +71,110 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in ra.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 64 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-    .extern MterpSuspendCheck
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranchNoFlags:
+    bgtzc   rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+    li      v0, JIT_CHECK_OSR
+    beqc    rPROFILE, v0, .L_osr_check
+    bltc    rPROFILE, v0, .L_resume_backward_branch
+    dsubu   rPROFILE, 1
+    beqzc   rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     REFRESH_IBASE
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bnez    ra, check1
-    GET_INST_OPCODE v0                              # extract opcode from rINST
-    GOTO_OPCODE v0                                  # jump to next instruction
-check1:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bnezc   ra, .L_suspend_request_pending
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_suspend_request_pending:
     EXPORT_PC
     move    a0, rSELF
-    jal     MterpSuspendCheck                       # (self)
-    bnezc   v0, MterpFallback                       # Something in the environment changed, switch interpreters
-    GET_INST_OPCODE v0                              # extract opcode from rINST
-    GOTO_OPCODE v0                                  # jump to next instruction
+    jal     MterpSuspendCheck           # (self)
+    bnezc   v0, MterpFallback
+    REFRESH_IBASE                       # might have changed during suspend
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_no_count_backwards:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bnec    rPROFILE, v0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal MterpMaybeDoOnStackReplacement  # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beqc    rPROFILE, v0, .L_check_osr_forward
+.L_resume_forward_branch:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    ld      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2 
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
 
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
@@ -143,6 +230,28 @@
 check2:
     li      v0, 1                                   # signal return to caller.
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
+    ld      s6, STACK_OFFSET_S6(sp)
+    .cfi_restore 22
     ld      s5, STACK_OFFSET_S5(sp)
     .cfi_restore 21
     ld      s4, STACK_OFFSET_S4(sp)
@@ -169,4 +278,5 @@
     .cfi_adjust_cfa_offset -STACK_SIZE
 
     .cfi_endproc
+    .set    reorder
     .size ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/mips64/header.S b/runtime/interpreter/mterp/mips64/header.S
index dd0fbe0..b67df20 100644
--- a/runtime/interpreter/mterp/mips64/header.S
+++ b/runtime/interpreter/mterp/mips64/header.S
@@ -51,16 +51,18 @@
   s3  rINST     first 16-bit code unit of current instruction
   s4  rIBASE    interpreted instruction base pointer, used for computed goto
   s5  rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  s6  rPROFILE  jit profile hotness countdown
 */
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC     s0
-#define rFP     s1
-#define rSELF   s2
-#define rINST   s3
-#define rIBASE  s4
-#define rREFS   s5
+#define rPC      s0
+#define rFP      s1
+#define rSELF    s2
+#define rINST    s3
+#define rIBASE   s4
+#define rREFS    s5
+#define rPROFILE s6
 
 /*
  * This is a #include, not a %include, because we want the C pre-processor
@@ -80,7 +82,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -121,6 +123,17 @@
 .endm
 
 /*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    daddu   rPC, rPC, \reg
+    FETCH_INST
+.endm
+
+/*
  * Fetch the next instruction from the specified offset.  Advances rPC
  * to point to the next instruction.
  *
@@ -267,7 +280,8 @@
 #define STACK_OFFSET_S3 40
 #define STACK_OFFSET_S4 48
 #define STACK_OFFSET_S5 56
-#define STACK_SIZE      64
+#define STACK_OFFSET_S6 64
+#define STACK_SIZE      80    /* needs 16 byte alignment */
 
 /* Constants for float/double_to_int/long conversions */
 #define INT_MIN             0x80000000
diff --git a/runtime/interpreter/mterp/mips64/op_goto.S b/runtime/interpreter/mterp/mips64/op_goto.S
index 7c7d0ec..68fc83d 100644
--- a/runtime/interpreter/mterp/mips64/op_goto.S
+++ b/runtime/interpreter/mterp/mips64/op_goto.S
@@ -5,21 +5,6 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    .extern MterpProfileBranch
     srl     rINST, rINST, 8
     seb     rINST, rINST                # rINST <- offset (sign-extended AA)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_goto_16.S b/runtime/interpreter/mterp/mips64/op_goto_16.S
index 566e3a7..ae56066 100644
--- a/runtime/interpreter/mterp/mips64/op_goto_16.S
+++ b/runtime/interpreter/mterp/mips64/op_goto_16.S
@@ -5,20 +5,5 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-    .extern MterpProfileBranch
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended AAAA)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_goto_32.S b/runtime/interpreter/mterp/mips64/op_goto_32.S
index b260083..498b6d6 100644
--- a/runtime/interpreter/mterp/mips64/op_goto_32.S
+++ b/runtime/interpreter/mterp/mips64/op_goto_32.S
@@ -8,22 +8,7 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    .extern MterpProfileBranch
     lh      rINST, 2(rPC)               # rINST <- aaaa (low)
     lh      a1, 4(rPC)                  # a1 <- AAAA (high)
     ins     rINST, a1, 16, 16           # rINST <- offset (sign-extended AAAAaaaa)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_packed_switch.S b/runtime/interpreter/mterp/mips64/op_packed_switch.S
index 2c6eb2f..27ce580 100644
--- a/runtime/interpreter/mterp/mips64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips64/op_packed_switch.S
@@ -19,18 +19,4 @@
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     $func                       # v0 <- code-unit branch offset
     move    rINST, v0
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f3.S b/runtime/interpreter/mterp/mips64/op_unused_f3.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f3.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f5.S b/runtime/interpreter/mterp/mips64/op_unused_f5.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f5.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f6.S b/runtime/interpreter/mterp/mips64/op_unused_f6.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f6.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f7.S b/runtime/interpreter/mterp/mips64/op_unused_f7.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f7.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f8.S b/runtime/interpreter/mterp/mips64/op_unused_f8.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f8.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_f9.S b/runtime/interpreter/mterp/mips64/op_unused_f9.S
new file mode 100644
index 0000000..29463d7
--- /dev/null
+++ b/runtime/interpreter/mterp/mips64/op_unused_f9.S
@@ -0,0 +1 @@
+%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/zcmp.S b/runtime/interpreter/mterp/mips64/zcmp.S
index 0e0477f..75db49e 100644
--- a/runtime/interpreter/mterp/mips64/zcmp.S
+++ b/runtime/interpreter/mterp/mips64/zcmp.S
@@ -6,25 +6,12 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    b${condition}zc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    b${condition}zc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index bd1af04..c25cd78 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -46,7 +46,7 @@
 void InitMterpTls(Thread* self) {
   self->SetMterpDefaultIBase(artMterpAsmInstructionStart);
   self->SetMterpAltIBase(artMterpAsmAltInstructionStart);
-  self->SetMterpCurrentIBase(TraceExecutionEnabled() ?
+  self->SetMterpCurrentIBase((kTraceExecutionEnabled || kTestExportPC) ?
                              artMterpAsmAltInstructionStart :
                              artMterpAsmInstructionStart);
 }
@@ -57,7 +57,7 @@
  * Returns 3 if we don't find a match (it's the size of the sparse-switch
  * instruction).
  */
-extern "C" int32_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
+extern "C" ssize_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
   const int kInstrLen = 3;
   uint16_t size;
   const int32_t* keys;
@@ -109,7 +109,7 @@
   return kInstrLen;
 }
 
-extern "C" int32_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
+extern "C" ssize_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
   const int kInstrLen = 3;
 
   /*
@@ -142,7 +142,7 @@
   return entries[index];
 }
 
-extern "C" bool MterpShouldSwitchInterpreters()
+extern "C" size_t MterpShouldSwitchInterpreters()
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const instrumentation::Instrumentation* const instrumentation =
       Runtime::Current()->GetInstrumentation();
@@ -150,8 +150,10 @@
 }
 
 
-extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame,
-                                   uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtual(Thread* self,
+                                     ShadowFrame* shadow_frame,
+                                     uint16_t* dex_pc_ptr,
+                                     uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -159,8 +161,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeSuper(Thread* self, ShadowFrame* shadow_frame,
-                                 uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeSuper(Thread* self,
+                                   ShadowFrame* shadow_frame,
+                                   uint16_t* dex_pc_ptr,
+                                   uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -168,8 +172,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeInterface(Thread* self, ShadowFrame* shadow_frame,
-                                     uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeInterface(Thread* self,
+                                       ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,
+                                       uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -177,8 +183,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeDirect(Thread* self, ShadowFrame* shadow_frame,
-                                  uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeDirect(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    uint16_t* dex_pc_ptr,
+                                    uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -186,8 +194,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeStatic(Thread* self, ShadowFrame* shadow_frame,
-                                  uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeStatic(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    uint16_t* dex_pc_ptr,
+                                    uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -195,8 +205,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeVirtualRange(Thread* self, ShadowFrame* shadow_frame,
-                                        uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualRange(Thread* self,
+                                          ShadowFrame* shadow_frame,
+                                          uint16_t* dex_pc_ptr,
+                                          uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -204,8 +216,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeSuperRange(Thread* self, ShadowFrame* shadow_frame,
-                                      uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeSuperRange(Thread* self,
+                                        ShadowFrame* shadow_frame,
+                                        uint16_t* dex_pc_ptr,
+                                        uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -213,8 +227,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeInterfaceRange(Thread* self, ShadowFrame* shadow_frame,
-                                          uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeInterfaceRange(Thread* self,
+                                            ShadowFrame* shadow_frame,
+                                            uint16_t* dex_pc_ptr,
+                                            uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -222,8 +238,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeDirectRange(Thread* self, ShadowFrame* shadow_frame,
-                                       uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeDirectRange(Thread* self,
+                                         ShadowFrame* shadow_frame,
+                                         uint16_t* dex_pc_ptr,
+                                         uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -231,8 +249,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeStaticRange(Thread* self, ShadowFrame* shadow_frame,
-                                       uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeStaticRange(Thread* self,
+                                         ShadowFrame* shadow_frame,
+                                         uint16_t* dex_pc_ptr,
+                                         uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -240,8 +260,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeVirtualQuick(Thread* self, ShadowFrame* shadow_frame,
-                                        uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualQuick(Thread* self,
+                                          ShadowFrame* shadow_frame,
+                                          uint16_t* dex_pc_ptr,
+                                          uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -249,8 +271,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeVirtualQuickRange(Thread* self, ShadowFrame* shadow_frame,
-                                             uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualQuickRange(Thread* self,
+                                               ShadowFrame* shadow_frame,
+                                               uint16_t* dex_pc_ptr,
+                                               uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -262,8 +286,10 @@
   QuasiAtomic::ThreadFenceForConstructor();
 }
 
-extern "C" bool MterpConstString(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
-                                 Thread* self)
+extern "C" size_t MterpConstString(uint32_t index,
+                                   uint32_t tgt_vreg,
+                                   ShadowFrame* shadow_frame,
+                                   Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   String* s = ResolveString(self, *shadow_frame,  index);
   if (UNLIKELY(s == nullptr)) {
@@ -273,8 +299,10 @@
   return false;
 }
 
-extern "C" bool MterpConstClass(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
-                                Thread* self)
+extern "C" size_t MterpConstClass(uint32_t index,
+                                  uint32_t tgt_vreg,
+                                  ShadowFrame* shadow_frame,
+                                  Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
   if (UNLIKELY(c == nullptr)) {
@@ -284,8 +312,10 @@
   return false;
 }
 
-extern "C" bool MterpCheckCast(uint32_t index, StackReference<mirror::Object>* vreg_addr,
-                               art::ArtMethod* method, Thread* self)
+extern "C" size_t MterpCheckCast(uint32_t index,
+                                 StackReference<mirror::Object>* vreg_addr,
+                                 art::ArtMethod* method,
+                                 Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
   if (UNLIKELY(c == nullptr)) {
@@ -300,8 +330,10 @@
   return false;
 }
 
-extern "C" bool MterpInstanceOf(uint32_t index, StackReference<mirror::Object>* vreg_addr,
-                                art::ArtMethod* method, Thread* self)
+extern "C" size_t MterpInstanceOf(uint32_t index,
+                                  StackReference<mirror::Object>* vreg_addr,
+                                  art::ArtMethod* method,
+                                  Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
   if (UNLIKELY(c == nullptr)) {
@@ -312,12 +344,12 @@
   return (obj != nullptr) && obj->InstanceOf(c);
 }
 
-extern "C" bool MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
+extern "C" size_t MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   return FillArrayData(obj, payload);
 }
 
-extern "C" bool MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
+extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   Object* obj = nullptr;
@@ -342,7 +374,7 @@
   return true;
 }
 
-extern "C" bool MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+extern "C" size_t MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
                                 uint32_t inst_data, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -350,23 +382,27 @@
       (self, *shadow_frame, inst, inst_data);
 }
 
-extern "C" bool MterpIputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                uint32_t inst_data, Thread* self)
+extern "C" size_t MterpIputObject(ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,
+                                  uint32_t inst_data,
+                                  Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   return DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, false, false>
       (self, *shadow_frame, inst, inst_data);
 }
 
-extern "C" bool MterpIputObjectQuick(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                     uint32_t inst_data)
+extern "C" size_t MterpIputObjectQuick(ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,
+                                       uint32_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   return DoIPutQuick<Primitive::kPrimNot, false>(*shadow_frame, inst, inst_data);
 }
 
-extern "C" bool MterpAputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                uint32_t inst_data)
+extern "C" size_t MterpAputObject(ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,
+                                  uint32_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x());
@@ -383,24 +419,27 @@
   return false;
 }
 
-extern "C" bool MterpFilledNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                    Thread* self)
+extern "C" size_t MterpFilledNewArray(ShadowFrame* shadow_frame,
+                                      uint16_t* dex_pc_ptr,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   return DoFilledNewArray<false, false, false>(inst, *shadow_frame, self,
                                                shadow_frame->GetResultRegister());
 }
 
-extern "C" bool MterpFilledNewArrayRange(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                         Thread* self)
+extern "C" size_t MterpFilledNewArrayRange(ShadowFrame* shadow_frame,
+                                           uint16_t* dex_pc_ptr,
+                                           Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   return DoFilledNewArray<true, false, false>(inst, *shadow_frame, self,
                                               shadow_frame->GetResultRegister());
 }
 
-extern "C" bool MterpNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                              uint32_t inst_data, Thread* self)
+extern "C" size_t MterpNewArray(ShadowFrame* shadow_frame,
+                                uint16_t* dex_pc_ptr,
+                                uint32_t inst_data, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data));
@@ -414,7 +453,7 @@
   return true;
 }
 
-extern "C" bool MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
+extern "C" size_t MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(self->IsExceptionPending());
   const instrumentation::Instrumentation* const instrumentation =
@@ -430,16 +469,23 @@
   return true;
 }
 
-extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame)
+extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
   uint16_t inst_data = inst->Fetch16(0);
   if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) {
     self->AssertPendingException();
   } else {
     self->AssertNoPendingException();
   }
-  TraceExecution(*shadow_frame, inst, shadow_frame->GetDexPC());
+  if (kTraceExecutionEnabled) {
+    uint32_t dex_pc = dex_pc_ptr - shadow_frame->GetCodeItem()->insns_;
+    TraceExecution(*shadow_frame, inst, dex_pc);
+  }
+  if (kTestExportPC) {
+    // Save invalid dex pc to force segfault if improperly used.
+    shadow_frame->SetDexPCPtr(reinterpret_cast<uint16_t*>(kExportPCPoison));
+  }
 }
 
 extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
@@ -519,14 +565,16 @@
   }
 }
 
-extern "C" bool MterpSuspendCheck(Thread* self)
+extern "C" size_t MterpSuspendCheck(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   self->AllowThreadSuspension();
   return MterpShouldSwitchInterpreters();
 }
 
-extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer,
-                                               uint64_t* new_value, Thread* self)
+extern "C" ssize_t artSet64IndirectStaticFromMterp(uint32_t field_idx,
+                                                   ArtMethod* referrer,
+                                                   uint64_t* new_value,
+                                                   Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
@@ -544,8 +592,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet8InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
-                                        ArtMethod* referrer)
+extern "C" ssize_t artSet8InstanceFromMterp(uint32_t field_idx,
+                                            mirror::Object* obj,
+                                            uint8_t new_value,
+                                            ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
@@ -561,8 +611,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet16InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
-                                        ArtMethod* referrer)
+extern "C" ssize_t artSet16InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint16_t new_value,
+                                             ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int16_t));
@@ -579,8 +631,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet32InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
-                                         uint32_t new_value, ArtMethod* referrer)
+extern "C" ssize_t artSet32InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint32_t new_value,
+                                             ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int32_t));
@@ -591,8 +645,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet64InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
-                                         uint64_t* new_value, ArtMethod* referrer)
+extern "C" ssize_t artSet64InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint64_t* new_value,
+                                             ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int64_t));
@@ -603,8 +659,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSetObjInstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
-                                          mirror::Object* new_value, ArtMethod* referrer)
+extern "C" ssize_t artSetObjInstanceFromMterp(uint32_t field_idx,
+                                              mirror::Object* obj,
+                                              mirror::Object* new_value,
+                                              ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
@@ -644,7 +702,7 @@
  * to the full instrumentation via MterpAddHotnessBatch.  Called once on entry to the method,
  * and regenerated following batch updates.
  */
-extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
+extern "C" ssize_t MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   uint16_t hotness_count = method->GetCounter();
   int32_t countdown_value = jit::kJitHotnessDisabled;
@@ -682,7 +740,7 @@
  * Report a batch of hotness events to the instrumentation and then return the new
  * countdown value to the next time we should report.
  */
-extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method,
+extern "C" ssize_t MterpAddHotnessBatch(ArtMethod* method,
                                         ShadowFrame* shadow_frame,
                                         Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -695,7 +753,7 @@
 }
 
 // TUNING: Unused by arm/arm64/x86/x86_64.  Remove when mips/mips64 mterps support batch updates.
-extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+extern "C" size_t MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   JValue* result = shadow_frame->GetResultRegister();
@@ -712,9 +770,9 @@
   }
 }
 
-extern "C" bool MterpMaybeDoOnStackReplacement(Thread* self,
-                                               ShadowFrame* shadow_frame,
-                                               int32_t offset)
+extern "C" size_t MterpMaybeDoOnStackReplacement(Thread* self,
+                                                 ShadowFrame* shadow_frame,
+                                                 int32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   JValue* result = shadow_frame->GetResultRegister();
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
index 8d24641..45ab98b 100644
--- a/runtime/interpreter/mterp/mterp.h
+++ b/runtime/interpreter/mterp/mterp.h
@@ -30,7 +30,19 @@
 
 void InitMterpTls(Thread* self);
 void CheckMterpAsmConstants();
-extern "C" bool MterpShouldSwitchInterpreters();
+
+// The return type should be 'bool' but our assembly stubs expect 'bool'
+// to be zero-extended to the whole register and that's broken on x86-64
+// as a 'bool' is returned in 'al' and the rest of 'rax' is garbage.
+// TODO: Fix mterp and stubs and revert this workaround. http://b/30232671
+extern "C" size_t MterpShouldSwitchInterpreters();
+
+// Poison value for TestExportPC.  If we segfault with this value, it means that a mterp
+// handler for a recent opcode failed to export the Dalvik PC prior to a possible exit from
+// the mterp environment.
+constexpr uintptr_t kExportPCPoison = 0xdead00ff;
+// Set true to enable poison testing of ExportPC.  Uses Alt interpreter.
+constexpr bool kTestExportPC = false;
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index a38a87b..c33df6d 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -6473,6 +6473,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6485,15 +6489,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    add     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6511,6 +6514,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6523,15 +6530,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    rsb     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    rsb     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6550,6 +6556,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6562,14 +6572,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    asr     r1, r3, #8                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
     mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6657,6 +6666,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6669,15 +6682,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    and     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6695,6 +6707,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6707,15 +6723,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    orr     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6733,6 +6748,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6745,15 +6764,14 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+                                @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-                               @ optional op; may set condition codes
-    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    eor     r0, r0, r3, asr #8                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -6771,6 +6789,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6783,14 +6805,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6809,6 +6830,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6821,14 +6846,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -6847,6 +6871,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than r0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (r1).  Useful for integer division and modulus.
      *
@@ -6859,14 +6887,13 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
-    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    ubfx    r1, r3, #8, #5                            @ optional; typically r1<- ssssssCC (sign extended)
     .if 0
     @cmp     r1, #0                     @ is second operand zero?
     beq     common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
 
-    and     r1, r1, #31                           @ optional op; may set condition codes
     mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     SET_VREG r0, r9                @ vAA<- r0
@@ -7201,9 +7228,13 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f3: /* 0xf3 */
+/* File: arm/op_unused_f3.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
@@ -7219,37 +7250,57 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f5: /* 0xf5 */
+/* File: arm/op_unused_f5.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f6: /* 0xf6 */
+/* File: arm/op_unused_f6.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f7: /* 0xf7 */
+/* File: arm/op_unused_f7.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f8: /* 0xf8 */
+/* File: arm/op_unused_f8.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f9: /* 0xf9 */
+/* File: arm/op_unused_f9.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
@@ -7441,12 +7492,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (0 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7458,12 +7509,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (1 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7475,12 +7526,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (2 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7492,12 +7543,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (3 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7509,12 +7560,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (4 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7526,12 +7577,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (5 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7543,12 +7594,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (6 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7560,12 +7611,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (7 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7577,12 +7628,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (8 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7594,12 +7645,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (9 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7611,12 +7662,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (10 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7628,12 +7679,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (11 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7645,12 +7696,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (12 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7662,12 +7713,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (13 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7679,12 +7730,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (14 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7696,12 +7747,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (15 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7713,12 +7764,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (16 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7730,12 +7781,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (17 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7747,12 +7798,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (18 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7764,12 +7815,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (19 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7781,12 +7832,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (20 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7798,12 +7849,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (21 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7815,12 +7866,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (22 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7832,12 +7883,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (23 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7849,12 +7900,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (24 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7866,12 +7917,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (25 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7883,12 +7934,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (26 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7900,12 +7951,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (27 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7917,12 +7968,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (28 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7934,12 +7985,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (29 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7951,12 +8002,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (30 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7968,12 +8019,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (31 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7985,12 +8036,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (32 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8002,12 +8053,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (33 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8019,12 +8070,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (34 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8036,12 +8087,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (35 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8053,12 +8104,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (36 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8070,12 +8121,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (37 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8087,12 +8138,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (38 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8104,12 +8155,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (39 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8121,12 +8172,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (40 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8138,12 +8189,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (41 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8155,12 +8206,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (42 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8172,12 +8223,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (43 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8189,12 +8240,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (44 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8206,12 +8257,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (45 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8223,12 +8274,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (46 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8240,12 +8291,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (47 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8257,12 +8308,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (48 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8274,12 +8325,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (49 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8291,12 +8342,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (50 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8308,12 +8359,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (51 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8325,12 +8376,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (52 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8342,12 +8393,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (53 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8359,12 +8410,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (54 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8376,12 +8427,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (55 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8393,12 +8444,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (56 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8410,12 +8461,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (57 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8427,12 +8478,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (58 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8444,12 +8495,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (59 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8461,12 +8512,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (60 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8478,12 +8529,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (61 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8495,12 +8546,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (62 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8512,12 +8563,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (63 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8529,12 +8580,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (64 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8546,12 +8597,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (65 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8563,12 +8614,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (66 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8580,12 +8631,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (67 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8597,12 +8648,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (68 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8614,12 +8665,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (69 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8631,12 +8682,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (70 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8648,12 +8699,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (71 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8665,12 +8716,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (72 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8682,12 +8733,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (73 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8699,12 +8750,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (74 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8716,12 +8767,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (75 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8733,12 +8784,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (76 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8750,12 +8801,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (77 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8767,12 +8818,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (78 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8784,12 +8835,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (79 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8801,12 +8852,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (80 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8818,12 +8869,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (81 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8835,12 +8886,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (82 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8852,12 +8903,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (83 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8869,12 +8920,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (84 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8886,12 +8937,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (85 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8903,12 +8954,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (86 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8920,12 +8971,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (87 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8937,12 +8988,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (88 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8954,12 +9005,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (89 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8971,12 +9022,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (90 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8988,12 +9039,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (91 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9005,12 +9056,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (92 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9022,12 +9073,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (93 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9039,12 +9090,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (94 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9056,12 +9107,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (95 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9073,12 +9124,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (96 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9090,12 +9141,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (97 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9107,12 +9158,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (98 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9124,12 +9175,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (99 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9141,12 +9192,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (100 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9158,12 +9209,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (101 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9175,12 +9226,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (102 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9192,12 +9243,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (103 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9209,12 +9260,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (104 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9226,12 +9277,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (105 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9243,12 +9294,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (106 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9260,12 +9311,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (107 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9277,12 +9328,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (108 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9294,12 +9345,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (109 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9311,12 +9362,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (110 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9328,12 +9379,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (111 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9345,12 +9396,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (112 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9362,12 +9413,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (113 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9379,12 +9430,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (114 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9396,12 +9447,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (115 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9413,12 +9464,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (116 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9430,12 +9481,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (117 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9447,12 +9498,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (118 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9464,12 +9515,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (119 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9481,12 +9532,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (120 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9498,12 +9549,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (121 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9515,12 +9566,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (122 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9532,12 +9583,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (123 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9549,12 +9600,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (124 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9566,12 +9617,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (125 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9583,12 +9634,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (126 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9600,12 +9651,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (127 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9617,12 +9668,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (128 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9634,12 +9685,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (129 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9651,12 +9702,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (130 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9668,12 +9719,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (131 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9685,12 +9736,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (132 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9702,12 +9753,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (133 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9719,12 +9770,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (134 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9736,12 +9787,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (135 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9753,12 +9804,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (136 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9770,12 +9821,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (137 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9787,12 +9838,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (138 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9804,12 +9855,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (139 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9821,12 +9872,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (140 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9838,12 +9889,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (141 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9855,12 +9906,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (142 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9872,12 +9923,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (143 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9889,12 +9940,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (144 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9906,12 +9957,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (145 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9923,12 +9974,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (146 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9940,12 +9991,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (147 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9957,12 +10008,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (148 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9974,12 +10025,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (149 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9991,12 +10042,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (150 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10008,12 +10059,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (151 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10025,12 +10076,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (152 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10042,12 +10093,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (153 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10059,12 +10110,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (154 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10076,12 +10127,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (155 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10093,12 +10144,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (156 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10110,12 +10161,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (157 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10127,12 +10178,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (158 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10144,12 +10195,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (159 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10161,12 +10212,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (160 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10178,12 +10229,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (161 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10195,12 +10246,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (162 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10212,12 +10263,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (163 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10229,12 +10280,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (164 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10246,12 +10297,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (165 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10263,12 +10314,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (166 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10280,12 +10331,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (167 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10297,12 +10348,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (168 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10314,12 +10365,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (169 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10331,12 +10382,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (170 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10348,12 +10399,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (171 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10365,12 +10416,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (172 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10382,12 +10433,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (173 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10399,12 +10450,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (174 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10416,12 +10467,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (175 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10433,12 +10484,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (176 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10450,12 +10501,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (177 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10467,12 +10518,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (178 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10484,12 +10535,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (179 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10501,12 +10552,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (180 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10518,12 +10569,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (181 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10535,12 +10586,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (182 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10552,12 +10603,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (183 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10569,12 +10620,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (184 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10586,12 +10637,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (185 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10603,12 +10654,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (186 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10620,12 +10671,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (187 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10637,12 +10688,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (188 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10654,12 +10705,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (189 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10671,12 +10722,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (190 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10688,12 +10739,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (191 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10705,12 +10756,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (192 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10722,12 +10773,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (193 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10739,12 +10790,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (194 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10756,12 +10807,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (195 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10773,12 +10824,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (196 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10790,12 +10841,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (197 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10807,12 +10858,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (198 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10824,12 +10875,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (199 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10841,12 +10892,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (200 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10858,12 +10909,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (201 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10875,12 +10926,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (202 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10892,12 +10943,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (203 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10909,12 +10960,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (204 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10926,12 +10977,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (205 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10943,12 +10994,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (206 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10960,12 +11011,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (207 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10977,12 +11028,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (208 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10994,12 +11045,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (209 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11011,12 +11062,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (210 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11028,12 +11079,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (211 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11045,12 +11096,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (212 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11062,12 +11113,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (213 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11079,12 +11130,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (214 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11096,12 +11147,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (215 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11113,12 +11164,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (216 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11130,12 +11181,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (217 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11147,12 +11198,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (218 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11164,12 +11215,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (219 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11181,12 +11232,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (220 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11198,12 +11249,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (221 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11215,12 +11266,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (222 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11232,12 +11283,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (223 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11249,12 +11300,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (224 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11266,12 +11317,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (225 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11283,12 +11334,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (226 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11300,12 +11351,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (227 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11317,12 +11368,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (228 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11334,12 +11385,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (229 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11351,12 +11402,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (230 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11368,12 +11419,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (231 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11385,12 +11436,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (232 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11402,12 +11453,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (233 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11419,12 +11470,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (234 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11436,12 +11487,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (235 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11453,12 +11504,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (236 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11470,12 +11521,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (237 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11487,12 +11538,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (238 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11504,12 +11555,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (239 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11521,12 +11572,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (240 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11538,12 +11589,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (241 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11555,16 +11606,16 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (242 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11572,12 +11623,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (243 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11589,16 +11640,16 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (244 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11606,16 +11657,16 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (245 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11623,16 +11674,16 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (246 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11640,16 +11691,16 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (247 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11657,16 +11708,16 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (248 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11674,12 +11725,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (249 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11691,12 +11742,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (250 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11708,12 +11759,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (251 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11725,12 +11776,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (252 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11742,12 +11793,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (253 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11759,12 +11810,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (254 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11776,12 +11827,12 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
     adrl   lr, artMterpAsmInstructionStart + (255 * 128)       @ Addr of primary handler.
     mov    r0, rSELF
     add    r1, rFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+    mov    r2, rPC
+    b      MterpCheckBefore     @ (self, shadow_frame, dex_pc_ptr)  @ Tail call.
 
     .balign 128
     .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 55797e6..c7303b9 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -279,6 +279,14 @@
 .endm
 
 /*
+ * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
+ * Used to avoid an extra instruction in int-to-long.
+ */
+.macro GET_VREG_S reg, vreg
+    ldrsw   \reg, [xFP, \vreg, uxtw #2]
+.endm
+
+/*
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
@@ -695,10 +703,9 @@
 .L_op_const_4: /* 0x12 */
 /* File: arm64/op_const_4.S */
     /* const/4 vA, #+B */
-    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    sbfx    w1, wINST, #12, #4          // w1<- sssssssB
     ubfx    w0, wINST, #8, #4           // w0<- A
     FETCH_ADVANCE_INST 1                // advance xPC, load wINST
-    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  // ip<- opcode from xINST
     SET_VREG w1, w0                     // fp[A]<- w1
     GOTO_OPCODE ip                      // execute next instruction
@@ -708,7 +715,7 @@
 .L_op_const_16: /* 0x13 */
 /* File: arm64/op_const_16.S */
     /* const/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance xPC, load wINST
     SET_VREG w0, w3                     // vAA<- w0
@@ -734,7 +741,7 @@
 .L_op_const_high16: /* 0x15 */
 /* File: arm64/op_const_high16.S */
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended)
     lsr     w3, wINST, #8               // r3<- AA
     lsl     w0, w0, #16                 // r0<- BBBB0000
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -747,10 +754,9 @@
 .L_op_const_wide_16: /* 0x16 */
 /* File: arm64/op_const_wide_16.S */
     /* const-wide/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S x0, 1                       // x0<- ssssssssssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    sbfm    x0, x0, 0, 31
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3
     GOTO_OPCODE ip                      // jump to next instruction
@@ -760,13 +766,12 @@
 .L_op_const_wide_32: /* 0x17 */
 /* File: arm64/op_const_wide_32.S */
     /* const-wide/32 vAA, #+BBBBbbbb */
-    FETCH w0, 1                         // w0<- 0000bbbb (low)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (low)
     lsr     w3, wINST, #8               // w3<- AA
-    FETCH_S w2, 2                       // w2<- ssssBBBB (high)
+    FETCH_S x2, 2                       // x2<- ssssssssssssBBBB (high)
     FETCH_ADVANCE_INST 3                // advance rPC, load wINST
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    orr     w0, w0, w2, lsl #16         // w0<- BBBBbbbb
-    sbfm    x0, x0, 0, 31
+    orr     x0, x0, x2, lsl #16         // x0<- ssssssssBBBBbbbb
     SET_VREG_WIDE x0, w3
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -934,8 +939,7 @@
     mov       x3, xSELF                 // w3<- self
     bl        MterpInstanceOf           // (index, &obj, method, self)
     ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    lsr       w2, wINST, #8             // w2<- A+
-    and       w2, w2, #15               // w2<- A
+    ubfx      w2, wINST, #8, #4         // w2<- A
     PREFETCH_INST 2
     cbnz      x1, MterpException
     ADVANCE 2                           // advance rPC
@@ -1053,12 +1057,12 @@
 /* File: arm64/op_fill_array_data.S */
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w1, w0, w1, lsl #16         // w1<- BBBBbbbb
+    orr     x1, x0, x1, lsl #16         // x1<- ssssssssBBBBbbbb
     GET_VREG w0, w3                     // w0<- vAA (array object)
-    add     x1, xPC, w1, lsl #1         // w1<- PC + BBBBbbbb*2 (array data off.)
+    add     x1, xPC, x1, lsl #1         // x1<- PC + ssssssssBBBBbbbb*2 (array data off.)
     bl      MterpFillArrayData          // (obj, payload)
     cbz     w0, MterpPossibleException      // exception?
     FETCH_ADVANCE_INST 3                // advance rPC, load rINST
@@ -1143,14 +1147,14 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
     GET_VREG w1, w3                     // w1<- vAA
-    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
+    sxtw    xINST, w0
     b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
@@ -1168,14 +1172,14 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
     GET_VREG w1, w3                     // w1<- vAA
-    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
+    sxtw    xINST, w0
     b       MterpCommonTakenBranchNoFlags
 
 
@@ -1195,10 +1199,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG s1, w2
     GET_VREG s2, w3
-    mov     w0, #-1
     fcmp s1, s2
-    csneg w0, w0, w0, le
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, lt
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
@@ -1221,10 +1224,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG s1, w2
     GET_VREG s2, w3
-    mov     w0, #1
     fcmp s1, s2
-    csneg w0, w0, w0, pl
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, cc
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
@@ -1247,10 +1249,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG_WIDE d1, w2
     GET_VREG_WIDE d2, w3
-    mov     w0, #-1
     fcmp d1, d2
-    csneg w0, w0, w0, le
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, lt
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
@@ -1273,10 +1274,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG_WIDE d1, w2
     GET_VREG_WIDE d2, w3
-    mov     w0, #1
     fcmp d1, d2
-    csneg w0, w0, w0, pl
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, cc
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
@@ -1294,8 +1294,8 @@
     GET_VREG_WIDE x1, w2
     GET_VREG_WIDE x2, w3
     cmp     x1, x2
-    csinc   w0, wzr, wzr, eq
-    csneg   w0, w0, w0, ge
+    cset    w0, ne
+    cneg    w0, w0, lt
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG w0, w4
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1472,8 +1472,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.eq MterpCommonTakenBranchNoFlags
+    .endif
+    cbz     w2, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1496,8 +1498,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.ne MterpCommonTakenBranchNoFlags
+    .endif
+    cbnz    w2, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1520,8 +1524,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.lt MterpCommonTakenBranchNoFlags
+    .endif
+    tbnz    w2, #31, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1544,8 +1550,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.ge MterpCommonTakenBranchNoFlags
+    .endif
+    tbz     w2, #31, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1568,7 +1576,9 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 1
     cmp     w2, #0                      // compare (vA, 0)
+    .endif
     b.gt MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
@@ -1592,7 +1602,9 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 1
     cmp     w2, #0                      // compare (vA, 0)
+    .endif
     b.le MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
@@ -3199,7 +3211,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sub     w0, wzr, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3225,7 +3236,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     mvn     w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3250,7 +3260,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    
     sub x0, xzr, x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
@@ -3274,7 +3283,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    
     mvn     x0, x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
@@ -3299,9 +3307,8 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-    mov w4, #0x80000000                           // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    add     w0, w0, w4                              // w0<- op, w0-w3 changed
+    eor     w0, w0, #0x80000000                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                     // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -3324,8 +3331,7 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    mov x1, #0x8000000000000000
-    add     x0, x0, x1
+    eor     x0, x0, #0x8000000000000000
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
     GOTO_OPCODE ip                      // jump to next instruction
@@ -3336,25 +3342,15 @@
     .balign 128
 .L_op_int_to_long: /* 0x81 */
 /* File: arm64/op_int_to_long.S */
-/* File: arm64/funopWider.S */
-    /*
-     * Generic 32bit-to-64bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "x0 = op w0".
-     *
-     * For: int-to-double, float-to-double, float-to-long
-     */
-    /* unop vA, vB */
+    /* int-to-long vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
-    GET_VREG w0, w3
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_S x0, w3                   // x0<- sign_extend(fp[B])
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
-    sbfm x0, x0, 0, 31                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG_WIDE x0, w4           // vA<- d0
+    SET_VREG_WIDE x0, w4                // fp[A]<- x0
     GOTO_OPCODE ip                      // jump to next instruction
 
-
 /* ------------------------------ */
     .balign 128
 .L_op_int_to_float: /* 0x82 */
@@ -3369,10 +3365,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG w0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     scvtf s0, w0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG s0, w4                // vA<- d0
@@ -3392,10 +3387,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG w0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     scvtf d0, w0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE d0, w4           // vA<- d0
@@ -3406,23 +3400,21 @@
     .balign 128
 .L_op_long_to_int: /* 0x84 */
 /* File: arm64/op_long_to_int.S */
-/* File: arm64/funopNarrower.S */
-    /*
-     * Generic 64bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "w0 = op x0".
-     *
-     * For: int-to-double, float-to-double, float-to-long
-     */
-    /* unop vA, vB */
-    lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
-    GET_VREG_WIDE x0, w3
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
-                                  // d0<- op
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG w0, w4                // vA<- d0
-    GOTO_OPCODE ip                      // jump to next instruction
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
 
 
 /* ------------------------------ */
@@ -3438,10 +3430,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     scvtf s0, x0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG s0, w4                // vA<- d0
@@ -3461,10 +3452,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     scvtf d0, x0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE d0, w4           // vA<- d0
@@ -3485,10 +3475,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG s0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvtzs w0, s0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG w0, w4                // vA<- d0
@@ -3508,10 +3497,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG s0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvtzs x0, s0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4           // vA<- d0
@@ -3531,10 +3519,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG s0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvt  d0, s0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE d0, w4           // vA<- d0
@@ -3554,10 +3541,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE d0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvtzs w0, d0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG w0, w4                // vA<- d0
@@ -3577,10 +3563,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE d0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvtzs x0, d0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4           // vA<- d0
@@ -3600,10 +3585,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE d0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvt s0, d0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG s0, w4                // vA<- d0
@@ -3627,7 +3611,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sxtb    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3653,7 +3636,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     uxth    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3679,7 +3661,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sxth    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -4032,7 +4013,7 @@
     cbz     w1, common_errDivideByZero  // is second operand zero?
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -4071,7 +4052,7 @@
     cbz     w1, common_errDivideByZero  // is second operand zero?
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     asr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -4110,7 +4091,7 @@
     cbz     w1, common_errDivideByZero  // is second operand zero?
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -4424,8 +4405,7 @@
     and      w1, w0, #255                // w1<- BB
     GET_VREG_WIDE x1, w1                // x1<- vBB
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and      x2, x2, #63                 // Mask low 6
-    lsl  x0, x1, x2                 // Do the shift.
+    lsl  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3                // vAA<- x0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -4450,8 +4430,7 @@
     and      w1, w0, #255                // w1<- BB
     GET_VREG_WIDE x1, w1                // x1<- vBB
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and      x2, x2, #63                 // Mask low 6
-    asr  x0, x1, x2                 // Do the shift.
+    asr  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3                // vAA<- x0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -4476,8 +4455,7 @@
     and      w1, w0, #255                // w1<- BB
     GET_VREG_WIDE x1, w1                // x1<- vBB
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and      x2, x2, #63                 // Mask low 6
-    lsr  x0, x1, x2                 // Do the shift.
+    lsr  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3                // vAA<- x0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -5089,7 +5067,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -5125,7 +5103,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     asr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -5161,7 +5139,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -5463,8 +5441,7 @@
     GET_VREG w1, w1                     // x1<- vB
     GET_VREG_WIDE x0, w2                // x0<- vA
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     x1, x1, #63                 // Mask low 6 bits.
-    lsl x0, x0, x1
+    lsl x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w2               // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
@@ -5485,8 +5462,7 @@
     GET_VREG w1, w1                     // x1<- vB
     GET_VREG_WIDE x0, w2                // x0<- vA
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     x1, x1, #63                 // Mask low 6 bits.
-    asr x0, x0, x1
+    asr x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w2               // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
@@ -5507,8 +5483,7 @@
     GET_VREG w1, w1                     // x1<- vB
     GET_VREG_WIDE x0, w2                // x0<- vA
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     x1, x1, #63                 // Mask low 6 bits.
-    lsr x0, x0, x1
+    lsr x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w2               // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
@@ -5529,8 +5504,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     fadd   s2, s0, s1                              // s2<- op
@@ -5554,8 +5528,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     fsub   s2, s0, s1                              // s2<- op
@@ -5579,8 +5552,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     fmul   s2, s0, s1                              // s2<- op
@@ -5604,8 +5576,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     fdiv   s2, s0, s1                              // s2<- op
@@ -5621,13 +5592,11 @@
 /* File: arm64/op_rem_float_2addr.S */
     /* rem vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     bl  fmodf
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG s0, w9
@@ -6075,6 +6044,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6083,17 +6056,17 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    add     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6111,6 +6084,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6119,11 +6096,11 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6148,6 +6125,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6156,11 +6137,11 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
@@ -6184,6 +6165,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6192,11 +6177,11 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 1
     cbz     w1, common_errDivideByZero
     .endif
@@ -6220,6 +6205,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6228,11 +6217,11 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    asr     w1, w3, #8                            // optional; typically w1<- ssssssCC (sign extended)
     .if 1
     cbz     w1, common_errDivideByZero
     .endif
@@ -6256,6 +6245,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6264,17 +6257,17 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    and     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6292,6 +6285,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6300,17 +6297,17 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    orr     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6328,6 +6325,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6336,17 +6337,17 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+                                // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
                                // optional op; may set condition codes
-    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    eor     w0, w0, w3, asr #8                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -6364,6 +6365,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6372,16 +6377,16 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -6400,6 +6405,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6408,16 +6417,16 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     asr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -6436,6 +6445,10 @@
      * This could be an ARM instruction or a function call.  (If the result
      * comes back in a register other than w0, you can override "result".)
      *
+     * You can override "extract" if the extraction of the literal value
+     * from w3 to w1 is not the default "asr w1, w3, #8". The extraction
+     * can be omitted completely if the shift is embedded in "instr".
+     *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (w1).  Useful for integer division and modulus.
      *
@@ -6444,16 +6457,16 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
-    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    ubfx    w1, w3, #8, #5                            // optional; typically w1<- ssssssCC (sign extended)
     .if 0
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -6471,8 +6484,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldr   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6489,9 +6501,8 @@
     FETCH w4, 1                         // w4<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cbz     w3, common_errNullObject        // object was null
-    add     x4, x3, x4                  // create direct pointer
-    ldr     x0, [x4]
+    cbz     w3, common_errNullObject    // object was null
+    ldr     x0, [x3, x4]                // x0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG_WIDE x0, w2
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -6527,7 +6538,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6544,12 +6554,10 @@
     FETCH w3, 1                         // w3<- field byte offset
     GET_VREG w2, w2                     // w2<- fp[B], the object pointer
     ubfx    w0, wINST, #8, #4           // w0<- A
-    cmp     w2, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w2, common_errNullObject    // object was null
     GET_VREG_WIDE x0, w0                // x0-< fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
-    add     x1, x2, x3                  // create a direct pointer
-    str     x0, [x1]
+    str     x0, [x2, x3]                // obj.field<- x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -6630,7 +6638,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6650,7 +6657,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6670,7 +6676,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6690,7 +6695,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6710,8 +6714,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldrb   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6731,8 +6734,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldrsb   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6752,8 +6754,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldrh   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6773,8 +6774,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldrsh   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6785,9 +6785,13 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f3: /* 0xf3 */
+/* File: arm64/op_unused_f3.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
@@ -6803,37 +6807,57 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f5: /* 0xf5 */
+/* File: arm64/op_unused_f5.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f6: /* 0xf6 */
+/* File: arm64/op_unused_f6.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f7: /* 0xf7 */
+/* File: arm64/op_unused_f7.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f8: /* 0xf8 */
+/* File: arm64/op_unused_f8.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f9: /* 0xf9 */
+/* File: arm64/op_unused_f9.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
 
 
 /* ------------------------------ */
@@ -6922,4368 +6946,6 @@
     .global artMterpAsmSisterEnd
 artMterpAsmSisterEnd:
 
-
-    .global artMterpAsmAltInstructionStart
-    .type   artMterpAsmAltInstructionStart, %function
-    .text
-
-artMterpAsmAltInstructionStart = .L_ALT_op_nop
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_nop: /* 0x00 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (0 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move: /* 0x01 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (1 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_from16: /* 0x02 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (2 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_16: /* 0x03 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (3 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_wide: /* 0x04 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (4 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_wide_from16: /* 0x05 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (5 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_wide_16: /* 0x06 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (6 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_object: /* 0x07 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (7 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_object_from16: /* 0x08 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (8 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_object_16: /* 0x09 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (9 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_result: /* 0x0a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (10 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_result_wide: /* 0x0b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (11 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_result_object: /* 0x0c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (12 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_move_exception: /* 0x0d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (13 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_return_void: /* 0x0e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (14 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_return: /* 0x0f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (15 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_return_wide: /* 0x10 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (16 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_return_object: /* 0x11 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (17 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_4: /* 0x12 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (18 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_16: /* 0x13 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (19 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const: /* 0x14 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (20 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_high16: /* 0x15 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (21 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_wide_16: /* 0x16 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (22 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_wide_32: /* 0x17 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (23 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_wide: /* 0x18 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (24 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_wide_high16: /* 0x19 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (25 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_string: /* 0x1a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (26 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_string_jumbo: /* 0x1b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (27 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_const_class: /* 0x1c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (28 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_monitor_enter: /* 0x1d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (29 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_monitor_exit: /* 0x1e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (30 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_check_cast: /* 0x1f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (31 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_instance_of: /* 0x20 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (32 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_array_length: /* 0x21 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (33 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_new_instance: /* 0x22 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (34 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_new_array: /* 0x23 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (35 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_filled_new_array: /* 0x24 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (36 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_filled_new_array_range: /* 0x25 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (37 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_fill_array_data: /* 0x26 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (38 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_throw: /* 0x27 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (39 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_goto: /* 0x28 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (40 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_goto_16: /* 0x29 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (41 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_goto_32: /* 0x2a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (42 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_packed_switch: /* 0x2b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (43 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sparse_switch: /* 0x2c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (44 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_cmpl_float: /* 0x2d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (45 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_cmpg_float: /* 0x2e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (46 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_cmpl_double: /* 0x2f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (47 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_cmpg_double: /* 0x30 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (48 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_cmp_long: /* 0x31 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (49 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_eq: /* 0x32 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (50 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_ne: /* 0x33 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (51 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_lt: /* 0x34 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (52 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_ge: /* 0x35 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (53 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_gt: /* 0x36 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (54 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_le: /* 0x37 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (55 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_eqz: /* 0x38 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (56 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_nez: /* 0x39 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (57 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_ltz: /* 0x3a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (58 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_gez: /* 0x3b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (59 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_gtz: /* 0x3c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (60 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_if_lez: /* 0x3d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (61 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_3e: /* 0x3e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (62 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_3f: /* 0x3f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (63 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_40: /* 0x40 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (64 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_41: /* 0x41 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (65 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_42: /* 0x42 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (66 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_43: /* 0x43 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (67 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aget: /* 0x44 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (68 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aget_wide: /* 0x45 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (69 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aget_object: /* 0x46 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (70 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aget_boolean: /* 0x47 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (71 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aget_byte: /* 0x48 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (72 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aget_char: /* 0x49 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (73 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aget_short: /* 0x4a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (74 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aput: /* 0x4b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (75 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aput_wide: /* 0x4c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (76 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aput_object: /* 0x4d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (77 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aput_boolean: /* 0x4e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (78 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aput_byte: /* 0x4f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (79 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aput_char: /* 0x50 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (80 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_aput_short: /* 0x51 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (81 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget: /* 0x52 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (82 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_wide: /* 0x53 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (83 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_object: /* 0x54 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (84 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_boolean: /* 0x55 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (85 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_byte: /* 0x56 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (86 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_char: /* 0x57 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (87 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_short: /* 0x58 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (88 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput: /* 0x59 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (89 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_wide: /* 0x5a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (90 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_object: /* 0x5b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (91 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_boolean: /* 0x5c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (92 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_byte: /* 0x5d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (93 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_char: /* 0x5e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (94 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_short: /* 0x5f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (95 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sget: /* 0x60 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (96 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sget_wide: /* 0x61 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (97 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sget_object: /* 0x62 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (98 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sget_boolean: /* 0x63 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (99 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sget_byte: /* 0x64 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (100 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sget_char: /* 0x65 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (101 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sget_short: /* 0x66 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (102 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sput: /* 0x67 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (103 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sput_wide: /* 0x68 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (104 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sput_object: /* 0x69 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (105 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sput_boolean: /* 0x6a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (106 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sput_byte: /* 0x6b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (107 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sput_char: /* 0x6c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (108 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sput_short: /* 0x6d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (109 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_virtual: /* 0x6e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (110 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_super: /* 0x6f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (111 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_direct: /* 0x70 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (112 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_static: /* 0x71 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (113 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_interface: /* 0x72 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (114 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_return_void_no_barrier: /* 0x73 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (115 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_virtual_range: /* 0x74 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (116 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_super_range: /* 0x75 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (117 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_direct_range: /* 0x76 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (118 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_static_range: /* 0x77 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (119 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_interface_range: /* 0x78 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (120 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_79: /* 0x79 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (121 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_7a: /* 0x7a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (122 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_neg_int: /* 0x7b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (123 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_not_int: /* 0x7c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (124 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_neg_long: /* 0x7d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (125 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_not_long: /* 0x7e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (126 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_neg_float: /* 0x7f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (127 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_neg_double: /* 0x80 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (128 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_int_to_long: /* 0x81 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (129 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_int_to_float: /* 0x82 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (130 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_int_to_double: /* 0x83 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (131 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_long_to_int: /* 0x84 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (132 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_long_to_float: /* 0x85 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (133 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_long_to_double: /* 0x86 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (134 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_float_to_int: /* 0x87 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (135 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_float_to_long: /* 0x88 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (136 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_float_to_double: /* 0x89 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (137 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_double_to_int: /* 0x8a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (138 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_double_to_long: /* 0x8b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (139 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_double_to_float: /* 0x8c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (140 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_int_to_byte: /* 0x8d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (141 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_int_to_char: /* 0x8e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (142 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_int_to_short: /* 0x8f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (143 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_int: /* 0x90 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (144 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sub_int: /* 0x91 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (145 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_int: /* 0x92 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (146 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_int: /* 0x93 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (147 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_int: /* 0x94 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (148 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_and_int: /* 0x95 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (149 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_or_int: /* 0x96 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (150 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_xor_int: /* 0x97 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (151 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shl_int: /* 0x98 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (152 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shr_int: /* 0x99 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (153 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_ushr_int: /* 0x9a */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (154 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_long: /* 0x9b */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (155 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sub_long: /* 0x9c */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (156 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_long: /* 0x9d */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (157 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_long: /* 0x9e */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (158 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_long: /* 0x9f */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (159 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_and_long: /* 0xa0 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (160 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_or_long: /* 0xa1 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (161 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_xor_long: /* 0xa2 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (162 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shl_long: /* 0xa3 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (163 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shr_long: /* 0xa4 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (164 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_ushr_long: /* 0xa5 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (165 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_float: /* 0xa6 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (166 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sub_float: /* 0xa7 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (167 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_float: /* 0xa8 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (168 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_float: /* 0xa9 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (169 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_float: /* 0xaa */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (170 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_double: /* 0xab */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (171 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sub_double: /* 0xac */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (172 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_double: /* 0xad */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (173 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_double: /* 0xae */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (174 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_double: /* 0xaf */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (175 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_int_2addr: /* 0xb0 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (176 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sub_int_2addr: /* 0xb1 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (177 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_int_2addr: /* 0xb2 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (178 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_int_2addr: /* 0xb3 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (179 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_int_2addr: /* 0xb4 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (180 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_and_int_2addr: /* 0xb5 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (181 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_or_int_2addr: /* 0xb6 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (182 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_xor_int_2addr: /* 0xb7 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (183 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shl_int_2addr: /* 0xb8 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (184 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shr_int_2addr: /* 0xb9 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (185 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_ushr_int_2addr: /* 0xba */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (186 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_long_2addr: /* 0xbb */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (187 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sub_long_2addr: /* 0xbc */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (188 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_long_2addr: /* 0xbd */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (189 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_long_2addr: /* 0xbe */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (190 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_long_2addr: /* 0xbf */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (191 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_and_long_2addr: /* 0xc0 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (192 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_or_long_2addr: /* 0xc1 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (193 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_xor_long_2addr: /* 0xc2 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (194 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shl_long_2addr: /* 0xc3 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (195 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shr_long_2addr: /* 0xc4 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (196 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_ushr_long_2addr: /* 0xc5 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (197 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_float_2addr: /* 0xc6 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (198 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sub_float_2addr: /* 0xc7 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (199 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_float_2addr: /* 0xc8 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (200 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_float_2addr: /* 0xc9 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (201 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_float_2addr: /* 0xca */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (202 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_double_2addr: /* 0xcb */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (203 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_sub_double_2addr: /* 0xcc */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (204 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_double_2addr: /* 0xcd */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (205 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_double_2addr: /* 0xce */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (206 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_double_2addr: /* 0xcf */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (207 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_int_lit16: /* 0xd0 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (208 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rsub_int: /* 0xd1 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (209 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_int_lit16: /* 0xd2 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (210 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_int_lit16: /* 0xd3 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (211 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_int_lit16: /* 0xd4 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (212 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_and_int_lit16: /* 0xd5 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (213 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_or_int_lit16: /* 0xd6 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (214 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_xor_int_lit16: /* 0xd7 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (215 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_add_int_lit8: /* 0xd8 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (216 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rsub_int_lit8: /* 0xd9 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (217 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_mul_int_lit8: /* 0xda */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (218 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_div_int_lit8: /* 0xdb */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (219 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_rem_int_lit8: /* 0xdc */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (220 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_and_int_lit8: /* 0xdd */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (221 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_or_int_lit8: /* 0xde */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (222 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_xor_int_lit8: /* 0xdf */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (223 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shl_int_lit8: /* 0xe0 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (224 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_shr_int_lit8: /* 0xe1 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (225 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_ushr_int_lit8: /* 0xe2 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (226 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_quick: /* 0xe3 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (227 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_wide_quick: /* 0xe4 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (228 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_object_quick: /* 0xe5 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (229 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_quick: /* 0xe6 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (230 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_wide_quick: /* 0xe7 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (231 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_object_quick: /* 0xe8 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (232 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (233 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (234 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_boolean_quick: /* 0xeb */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (235 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_byte_quick: /* 0xec */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (236 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_char_quick: /* 0xed */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (237 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iput_short_quick: /* 0xee */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (238 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_boolean_quick: /* 0xef */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (239 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_byte_quick: /* 0xf0 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (240 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_char_quick: /* 0xf1 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (241 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_iget_short_quick: /* 0xf2 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (242 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (243 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_f4: /* 0xf4 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (244 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (245 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (246 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (247 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (248 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (249 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_fa: /* 0xfa */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (250 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_fb: /* 0xfb */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (251 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (252 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (253 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_fe: /* 0xfe */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (254 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-/* ------------------------------ */
-    .balign 128
-.L_ALT_op_unused_ff: /* 0xff */
-/* File: arm64/alt_stub.S */
-/*
- * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
- * any interesting requests and then jump to the real instruction
- * handler.  Note that the call to MterpCheckBefore is done as a tail call.
- */
-    .extern MterpCheckBefore
-    EXPORT_PC
-    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
-    adr    lr, artMterpAsmInstructionStart + (255 * 128)       // Addr of primary handler.
-    mov    x0, xSELF
-    add    x1, xFP, #OFF_FP_SHADOWFRAME
-    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
-
-    .balign 128
-    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
-    .global artMterpAsmAltInstructionEnd
-artMterpAsmAltInstructionEnd:
 /* File: arm64/footer.S */
 /*
  * ===========================================================================
@@ -11521,7 +7183,7 @@
 #if MTERP_LOGGING
     mov  x0, xSELF
     add  x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm x2, xINST, 0, 31
+    sxtw x2, wINST
     bl MterpLogOSR
 #endif
     mov  x0, #1                         // Signal normal return
@@ -11599,3 +7261,4365 @@
     .size   ExecuteMterpImpl, .-ExecuteMterpImpl
 
 
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (0 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (1 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (2 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (3 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (4 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (5 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (6 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (7 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (8 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (9 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (10 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (11 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (12 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (13 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (14 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (15 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (16 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (17 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (18 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (19 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (20 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (21 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (22 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (23 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (24 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (25 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (26 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (27 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (28 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (29 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (30 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (31 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (32 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (33 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (34 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (35 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (36 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (37 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (38 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (39 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (40 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (41 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (42 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (43 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (44 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (45 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (46 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (47 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (48 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (49 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (50 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (51 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (52 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (53 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (54 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (55 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (56 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (57 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (58 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (59 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (60 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (61 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (62 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (63 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (64 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (65 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (66 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (67 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (68 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (69 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (70 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (71 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (72 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (73 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (74 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (75 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (76 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (77 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (78 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (79 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (80 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (81 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (82 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (83 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (84 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (85 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (86 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (87 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (88 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (89 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (90 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (91 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (92 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (93 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (94 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (95 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (96 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (97 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (98 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (99 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (100 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (101 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (102 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (103 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (104 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (105 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (106 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (107 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (108 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (109 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (110 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (111 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (112 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (113 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (114 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (115 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (116 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (117 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (118 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (119 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (120 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (121 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (122 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (123 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (124 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (125 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (126 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (127 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (128 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (129 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (130 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (131 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (132 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (133 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (134 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (135 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (136 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (137 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (138 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (139 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (140 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (141 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (142 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (143 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (144 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (145 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (146 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (147 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (148 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (149 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (150 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (151 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (152 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (153 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (154 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (155 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (156 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (157 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (158 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (159 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (160 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (161 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (162 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (163 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (164 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (165 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (166 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (167 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (168 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (169 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (170 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (171 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (172 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (173 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (174 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (175 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (176 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (177 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (178 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (179 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (180 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (181 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (182 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (183 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (184 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (185 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (186 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (187 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (188 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (189 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (190 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (191 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (192 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (193 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (194 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (195 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (196 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (197 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (198 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (199 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (200 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (201 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (202 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (203 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (204 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (205 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (206 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (207 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (208 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (209 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (210 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (211 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (212 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (213 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (214 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (215 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (216 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (217 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (218 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (219 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (220 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (221 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (222 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (223 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (224 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (225 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (226 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (227 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (228 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (229 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (230 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (231 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (232 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (233 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (234 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (235 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (236 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (237 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (238 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (239 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (240 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (241 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (242 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f3: /* 0xf3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (243 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (244 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f5: /* 0xf5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (245 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f6: /* 0xf6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (246 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f7: /* 0xf7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (247 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f8: /* 0xf8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (248 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f9: /* 0xf9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (249 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (250 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (251 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (252 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (253 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (254 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (255 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    mov    x2, xPC
+    b      MterpCheckBefore     // (self, shadow_frame, dex_pc_ptr) Note: tail call.
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index b134129..fef7dc6 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -58,7 +58,11 @@
    s2   rSELF     self (Thread) pointer
    s3   rIBASE    interpreted instruction base pointer, used for computed goto
    s4   rINST     first 16-bit code unit of current instruction
+   s5   rOBJ      object pointer
    s6   rREFS     base of object references in shadow frame (ideally, we'll get rid of this later).
+   s7   rTEMP     used as temp storage that can survive a function call
+   s8   rPROFILE  branch profiling countdown
+
 */
 
 /* single-purpose registers, given names for clarity */
@@ -70,6 +74,7 @@
 #define rOBJ s5
 #define rREFS s6
 #define rTEMP s7
+#define rPROFILE s8
 
 #define rARG0 a0
 #define rARG1 a1
@@ -167,7 +172,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -490,6 +495,9 @@
     STACK_LOAD(ra, 124); \
     DELETE_STACK(STACK_SIZE)
 
+#define REFRESH_IBASE() \
+    lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+
 /* File: mips/entry.S */
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -553,6 +561,12 @@
     /* Starting ibase */
     lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
 
+    /* Set up for backwards branches & osr profiling */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpSetUpHotnessCountdown)        # (method, shadow_frame)
+    move    rPROFILE, v0                   # Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST()                           # load rINST from rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -1284,37 +1298,9 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-#if MTERP_PROFILE_BRANCHES
     sll       a0, rINST, 16                #  a0 <- AAxx0000
     sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a2, rINST, rINST             #  a2 <- byte offset
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    /* If backwards branch refresh rIBASE */
-    bgez      a2, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    sll       a0, rINST, 16                #  a0 <- AAxx0000
-    sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
-    addu      a2, rINST, rINST             #  a2 <- byte offset
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    /* If backwards branch refresh rIBASE */
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1327,33 +1313,8 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset, flags set
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
-    addu      a1, rINST, rINST             #  a1 <- byte offset, flags set
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1369,39 +1330,11 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
     sll       a1, a1, 16
     or        rINST, a0, a1                #  rINST <- AAAAaaaa
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
-    FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b         MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1417,7 +1350,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
@@ -1427,37 +1359,7 @@
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoPackedSwitch)                             #  a0 <- code-unit branch offset
     move      rINST, v0
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, .Lop_packed_switch_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-#else
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
-    GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
-    GET_VREG(a1, a3)                       #  a1 <- vAA
-    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
-    JAL(MterpDoPackedSwitch)                             #  a0 <- code-unit branch offset
-    move      rINST, v0
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
-
+    b         MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1474,7 +1376,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
@@ -1484,37 +1385,7 @@
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoSparseSwitch)                             #  a0 <- code-unit branch offset
     move      rINST, v0
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, .Lop_sparse_switch_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-#else
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
-    GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
-    GET_VREG(a1, a3)                       #  a1 <- vAA
-    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
-    JAL(MterpDoSparseSwitch)                             #  a0 <- code-unit branch offset
-    move      rINST, v0
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
-
+    b         MterpCommonTakenBranchNoFlags
 
 
 /* ------------------------------ */
@@ -1772,9 +1643,8 @@
 /* File: mips/op_if_eq.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1782,27 +1652,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    bne a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_eq_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    beq a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1811,9 +1668,8 @@
 /* File: mips/op_if_ne.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1821,27 +1677,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    beq a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_ne_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    bne a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1850,9 +1693,8 @@
 /* File: mips/op_if_lt.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1860,27 +1702,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    bge a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_lt_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    blt a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1889,9 +1718,8 @@
 /* File: mips/op_if_ge.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1899,27 +1727,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    blt a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_ge_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    bge a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1928,9 +1743,8 @@
 /* File: mips/op_if_gt.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1938,27 +1752,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    ble a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_gt_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    bgt a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1967,9 +1768,8 @@
 /* File: mips/op_if_le.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1977,27 +1777,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    bgt a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_le_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    ble a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2006,35 +1793,19 @@
 /* File: mips/op_if_eqz.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    bne a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    beq a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2045,35 +1816,19 @@
 /* File: mips/op_if_nez.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    beq a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    bne a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2084,35 +1839,19 @@
 /* File: mips/op_if_ltz.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    bge a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    blt a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2123,35 +1862,19 @@
 /* File: mips/op_if_gez.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    blt a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    bge a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2162,35 +1885,19 @@
 /* File: mips/op_if_gtz.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    ble a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    bgt a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2201,35 +1908,19 @@
 /* File: mips/op_if_lez.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    bgt a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    ble a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -7856,9 +7547,14 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f3: /* 0xf3 */
+/* File: mips/op_unused_f3.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
@@ -7873,33 +7569,58 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f5: /* 0xf5 */
+/* File: mips/op_unused_f5.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f6: /* 0xf6 */
+/* File: mips/op_unused_f6.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f7: /* 0xf7 */
+/* File: mips/op_unused_f7.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f8: /* 0xf8 */
+/* File: mips/op_unused_f8.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
-    b    MterpFallback
+.L_op_unused_f9: /* 0xf9 */
+/* File: mips/op_unused_f9.S */
+/* File: mips/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
 
 /* ------------------------------ */
     .balign 128
@@ -7983,18 +7704,6 @@
     .balign 4
 artMterpAsmSisterStart:
 
-/* continuation for op_packed_switch */
-
-.Lop_packed_switch_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_sparse_switch */
-
-.Lop_sparse_switch_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
 /* continuation for op_cmpl_float */
 
 .Lop_cmpl_float_nan:
@@ -8039,42 +7748,6 @@
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
-/* continuation for op_if_eq */
-
-.L_op_if_eq_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_ne */
-
-.L_op_if_ne_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_lt */
-
-.L_op_if_lt_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_ge */
-
-.L_op_if_ge_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_gt */
-
-.L_op_if_gt_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_le */
-
-.L_op_if_le_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
 /* continuation for op_float_to_int */
 
 /*
@@ -8372,13 +8045,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (0 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8390,13 +8063,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (1 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8408,13 +8081,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (2 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8426,13 +8099,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (3 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8444,13 +8117,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (4 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8462,13 +8135,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (5 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8480,13 +8153,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (6 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8498,13 +8171,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (7 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8516,13 +8189,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (8 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8534,13 +8207,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (9 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8552,13 +8225,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (10 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8570,13 +8243,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (11 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8588,13 +8261,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (12 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8606,13 +8279,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (13 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8624,13 +8297,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (14 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8642,13 +8315,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (15 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8660,13 +8333,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (16 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8678,13 +8351,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (17 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8696,13 +8369,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (18 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8714,13 +8387,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (19 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8732,13 +8405,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (20 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8750,13 +8423,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (21 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8768,13 +8441,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (22 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8786,13 +8459,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (23 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8804,13 +8477,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (24 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8822,13 +8495,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (25 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8840,13 +8513,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (26 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8858,13 +8531,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (27 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8876,13 +8549,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (28 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8894,13 +8567,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (29 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8912,13 +8585,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (30 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8930,13 +8603,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (31 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8948,13 +8621,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (32 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8966,13 +8639,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (33 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -8984,13 +8657,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (34 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9002,13 +8675,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (35 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9020,13 +8693,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (36 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9038,13 +8711,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (37 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9056,13 +8729,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (38 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9074,13 +8747,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (39 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9092,13 +8765,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (40 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9110,13 +8783,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (41 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9128,13 +8801,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (42 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9146,13 +8819,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (43 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9164,13 +8837,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (44 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9182,13 +8855,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (45 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9200,13 +8873,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (46 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9218,13 +8891,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (47 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9236,13 +8909,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (48 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9254,13 +8927,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (49 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9272,13 +8945,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (50 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9290,13 +8963,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (51 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9308,13 +8981,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (52 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9326,13 +8999,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (53 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9344,13 +9017,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (54 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9362,13 +9035,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (55 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9380,13 +9053,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (56 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9398,13 +9071,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (57 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9416,13 +9089,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (58 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9434,13 +9107,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (59 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9452,13 +9125,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (60 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9470,13 +9143,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (61 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9488,13 +9161,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (62 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9506,13 +9179,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (63 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9524,13 +9197,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (64 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9542,13 +9215,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (65 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9560,13 +9233,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (66 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9578,13 +9251,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (67 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9596,13 +9269,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (68 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9614,13 +9287,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (69 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9632,13 +9305,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (70 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9650,13 +9323,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (71 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9668,13 +9341,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (72 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9686,13 +9359,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (73 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9704,13 +9377,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (74 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9722,13 +9395,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (75 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9740,13 +9413,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (76 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9758,13 +9431,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (77 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9776,13 +9449,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (78 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9794,13 +9467,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (79 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9812,13 +9485,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (80 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9830,13 +9503,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (81 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9848,13 +9521,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (82 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9866,13 +9539,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (83 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9884,13 +9557,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (84 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9902,13 +9575,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (85 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9920,13 +9593,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (86 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9938,13 +9611,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (87 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9956,13 +9629,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (88 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9974,13 +9647,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (89 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -9992,13 +9665,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (90 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10010,13 +9683,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (91 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10028,13 +9701,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (92 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10046,13 +9719,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (93 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10064,13 +9737,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (94 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10082,13 +9755,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (95 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10100,13 +9773,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (96 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10118,13 +9791,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (97 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10136,13 +9809,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (98 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10154,13 +9827,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (99 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10172,13 +9845,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (100 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10190,13 +9863,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (101 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10208,13 +9881,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (102 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10226,13 +9899,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (103 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10244,13 +9917,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (104 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10262,13 +9935,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (105 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10280,13 +9953,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (106 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10298,13 +9971,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (107 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10316,13 +9989,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (108 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10334,13 +10007,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (109 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10352,13 +10025,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (110 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10370,13 +10043,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (111 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10388,13 +10061,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (112 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10406,13 +10079,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (113 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10424,13 +10097,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (114 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10442,13 +10115,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (115 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10460,13 +10133,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (116 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10478,13 +10151,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (117 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10496,13 +10169,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (118 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10514,13 +10187,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (119 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10532,13 +10205,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (120 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10550,13 +10223,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (121 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10568,13 +10241,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (122 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10586,13 +10259,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (123 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10604,13 +10277,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (124 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10622,13 +10295,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (125 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10640,13 +10313,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (126 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10658,13 +10331,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (127 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10676,13 +10349,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (128 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10694,13 +10367,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (129 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10712,13 +10385,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (130 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10730,13 +10403,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (131 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10748,13 +10421,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (132 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10766,13 +10439,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (133 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10784,13 +10457,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (134 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10802,13 +10475,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (135 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10820,13 +10493,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (136 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10838,13 +10511,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (137 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10856,13 +10529,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (138 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10874,13 +10547,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (139 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10892,13 +10565,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (140 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10910,13 +10583,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (141 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10928,13 +10601,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (142 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10946,13 +10619,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (143 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10964,13 +10637,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (144 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -10982,13 +10655,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (145 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11000,13 +10673,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (146 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11018,13 +10691,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (147 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11036,13 +10709,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (148 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11054,13 +10727,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (149 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11072,13 +10745,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (150 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11090,13 +10763,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (151 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11108,13 +10781,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (152 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11126,13 +10799,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (153 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11144,13 +10817,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (154 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11162,13 +10835,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (155 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11180,13 +10853,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (156 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11198,13 +10871,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (157 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11216,13 +10889,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (158 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11234,13 +10907,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (159 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11252,13 +10925,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (160 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11270,13 +10943,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (161 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11288,13 +10961,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (162 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11306,13 +10979,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (163 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11324,13 +10997,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (164 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11342,13 +11015,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (165 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11360,13 +11033,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (166 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11378,13 +11051,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (167 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11396,13 +11069,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (168 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11414,13 +11087,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (169 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11432,13 +11105,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (170 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11450,13 +11123,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (171 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11468,13 +11141,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (172 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11486,13 +11159,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (173 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11504,13 +11177,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (174 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11522,13 +11195,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (175 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11540,13 +11213,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (176 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11558,13 +11231,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (177 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11576,13 +11249,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (178 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11594,13 +11267,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (179 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11612,13 +11285,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (180 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11630,13 +11303,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (181 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11648,13 +11321,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (182 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11666,13 +11339,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (183 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11684,13 +11357,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (184 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11702,13 +11375,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (185 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11720,13 +11393,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (186 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11738,13 +11411,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (187 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11756,13 +11429,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (188 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11774,13 +11447,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (189 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11792,13 +11465,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (190 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11810,13 +11483,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (191 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11828,13 +11501,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (192 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11846,13 +11519,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (193 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11864,13 +11537,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (194 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11882,13 +11555,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (195 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11900,13 +11573,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (196 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11918,13 +11591,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (197 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11936,13 +11609,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (198 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11954,13 +11627,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (199 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11972,13 +11645,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (200 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -11990,13 +11663,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (201 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12008,13 +11681,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (202 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12026,13 +11699,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (203 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12044,13 +11717,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (204 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12062,13 +11735,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (205 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12080,13 +11753,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (206 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12098,13 +11771,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (207 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12116,13 +11789,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (208 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12134,13 +11807,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (209 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12152,13 +11825,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (210 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12170,13 +11843,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (211 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12188,13 +11861,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (212 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12206,13 +11879,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (213 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12224,13 +11897,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (214 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12242,13 +11915,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (215 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12260,13 +11933,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (216 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12278,13 +11951,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (217 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12296,13 +11969,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (218 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12314,13 +11987,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (219 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12332,13 +12005,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (220 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12350,13 +12023,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (221 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12368,13 +12041,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (222 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12386,13 +12059,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (223 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12404,13 +12077,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (224 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12422,13 +12095,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (225 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12440,13 +12113,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (226 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12458,13 +12131,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (227 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12476,13 +12149,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (228 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12494,13 +12167,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (229 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12512,13 +12185,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (230 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12530,13 +12203,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (231 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12548,13 +12221,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (232 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12566,13 +12239,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (233 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12584,13 +12257,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (234 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12602,13 +12275,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (235 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12620,13 +12293,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (236 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12638,13 +12311,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (237 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12656,13 +12329,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (238 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12674,13 +12347,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (239 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12692,13 +12365,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (240 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12710,13 +12383,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (241 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12728,17 +12401,17 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (242 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12746,13 +12419,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (243 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12764,17 +12437,17 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (244 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12782,17 +12455,17 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (245 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12800,17 +12473,17 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (246 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12818,17 +12491,17 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (247 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12836,17 +12509,17 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (248 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12854,13 +12527,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (249 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12872,13 +12545,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (250 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12890,13 +12563,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (251 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12908,13 +12581,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (252 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12926,13 +12599,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (253 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12944,13 +12617,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (254 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
 /* ------------------------------ */
     .balign 128
@@ -12962,13 +12635,13 @@
  * handler.    Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC()
     la     ra, artMterpAsmInstructionStart + (255 * 128)   # Addr of primary handler
     lw     rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)           # refresh IBASE
     move   a0, rSELF                    # arg0
     addu   a1, rFP, OFF_FP_SHADOWFRAME  # arg1
-    la     a2, MterpCheckBefore
-    jalr   zero, a2                     # Tail call to Mterp(self, shadow_frame)
+    move   a2, rPC
+    la     t9, MterpCheckBefore
+    jalr   zero, t9                     # Tail call to Mterp(self, shadow_frame, dex_pc_ptr)
 
     .balign 128
     .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
@@ -13089,20 +12762,110 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
  */
-MterpCheckSuspendAndContinue:
-    lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)  # refresh rIBASE
+MterpCommonTakenBranchNoFlags:
+    bgtz    rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    li      t0, JIT_CHECK_OSR
+    beq     rPROFILE, t0, .L_osr_check
+    blt     rPROFILE, t0, .L_resume_backward_branch
+    subu    rPROFILE, 1
+    beqz    rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    REFRESH_IBASE()
+    addu    a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
     and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bnez    ra, 1f
+    bnez    ra, .L_suspend_request_pending
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
-1:
+
+.L_suspend_request_pending:
     EXPORT_PC()
     move    a0, rSELF
     JAL(MterpSuspendCheck)              # (self)
     bnez    v0, MterpFallback
+    REFRESH_IBASE()                     # might have changed during suspend
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_no_count_backwards:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bne     rPROFILE, t0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beq     rPROFILE, t0, .L_check_osr_forward
+.L_resume_forward_branch:
+    add     a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    lw      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST(2)
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
 
@@ -13149,6 +12912,26 @@
     sw      v1, 4(a2)
     li      v0, 1                       # signal return to caller.
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
 /* Restore from the stack and return. Frame size = STACK_SIZE */
     STACK_LOAD_FULL()
     jalr    zero, ra
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index a17252b..a061f1e 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -58,16 +58,18 @@
   s3  rINST     first 16-bit code unit of current instruction
   s4  rIBASE    interpreted instruction base pointer, used for computed goto
   s5  rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  s6  rPROFILE  jit profile hotness countdown
 */
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC     s0
-#define rFP     s1
-#define rSELF   s2
-#define rINST   s3
-#define rIBASE  s4
-#define rREFS   s5
+#define rPC      s0
+#define rFP      s1
+#define rSELF    s2
+#define rINST    s3
+#define rIBASE   s4
+#define rREFS    s5
+#define rPROFILE s6
 
 /*
  * This is a #include, not a %include, because we want the C pre-processor
@@ -87,7 +89,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -128,6 +130,17 @@
 .endm
 
 /*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    daddu   rPC, rPC, \reg
+    FETCH_INST
+.endm
+
+/*
  * Fetch the next instruction from the specified offset.  Advances rPC
  * to point to the next instruction.
  *
@@ -274,7 +287,8 @@
 #define STACK_OFFSET_S3 40
 #define STACK_OFFSET_S4 48
 #define STACK_OFFSET_S5 56
-#define STACK_SIZE      64
+#define STACK_OFFSET_S6 64
+#define STACK_SIZE      80    /* needs 16 byte alignment */
 
 /* Constants for float/double_to_int/long conversions */
 #define INT_MIN             0x80000000
@@ -344,6 +358,8 @@
     .cfi_rel_offset 20, STACK_OFFSET_S4
     sd      s5, STACK_OFFSET_S5(sp)
     .cfi_rel_offset 21, STACK_OFFSET_S5
+    sd      s6, STACK_OFFSET_S6(sp)
+    .cfi_rel_offset 22, STACK_OFFSET_S6
 
     /* Remember the return register */
     sd      a3, SHADOWFRAME_RESULT_REGISTER_OFFSET(a2)
@@ -364,6 +380,12 @@
     /* Starting ibase */
     REFRESH_IBASE
 
+    /* Set up for backwards branches & osr profiling */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpSetUpHotnessCountdown
+    move    rPROFILE, v0                # Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST
     GET_INST_OPCODE v0
@@ -1100,24 +1122,9 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    .extern MterpProfileBranch
     srl     rINST, rINST, 8
     seb     rINST, rINST                # rINST <- offset (sign-extended AA)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1130,23 +1137,8 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-    .extern MterpProfileBranch
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended AAAA)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1162,25 +1154,10 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    .extern MterpProfileBranch
     lh      rINST, 2(rPC)               # rINST <- aaaa (low)
     lh      a1, 4(rPC)                  # a1 <- AAAA (high)
     ins     rINST, a1, 16, 16           # rINST <- offset (sign-extended AAAAaaaa)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1206,21 +1183,7 @@
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     MterpDoPackedSwitch                       # v0 <- code-unit branch offset
     move    rINST, v0
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1247,21 +1210,7 @@
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     MterpDoSparseSwitch                       # v0 <- code-unit branch offset
     move    rINST, v0
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 
 /* ------------------------------ */
@@ -1453,22 +1402,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    beqc a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    beqc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1492,22 +1429,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    bnec a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bnec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1531,22 +1456,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    bltc a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bltc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1570,22 +1483,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    bgec a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bgec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1609,22 +1510,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    bgtc a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bgtc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1648,22 +1537,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    blec a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    blec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1681,26 +1558,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    beqzc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    beqzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1718,26 +1582,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    bnezc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bnezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1755,26 +1606,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    bltzc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bltzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1792,26 +1630,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    bgezc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bgezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1829,26 +1654,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    bgtzc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bgtzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1866,26 +1678,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    blezc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    blezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -7204,10 +7003,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f3: /* 0xf3 */
+/* File: mips64/op_unused_f3.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
 .L_op_unused_f4: /* 0xf4 */
@@ -7221,34 +7025,59 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f5: /* 0xf5 */
+/* File: mips64/op_unused_f5.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f6: /* 0xf6 */
+/* File: mips64/op_unused_f6.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f7: /* 0xf7 */
+/* File: mips64/op_unused_f7.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f8: /* 0xf8 */
+/* File: mips64/op_unused_f8.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f9: /* 0xf9 */
+/* File: mips64/op_unused_f9.S */
+/* File: mips64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     b       MterpFallback
 
+
 /* ------------------------------ */
     .balign 128
 .L_op_unused_fa: /* 0xfa */
@@ -7391,14 +7220,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (0 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7410,14 +7239,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (1 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7429,14 +7258,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (2 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7448,14 +7277,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (3 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7467,14 +7296,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (4 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7486,14 +7315,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (5 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7505,14 +7334,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (6 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7524,14 +7353,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (7 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7543,14 +7372,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (8 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7562,14 +7391,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (9 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7581,14 +7410,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (10 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7600,14 +7429,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (11 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7619,14 +7448,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (12 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7638,14 +7467,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (13 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7657,14 +7486,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (14 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7676,14 +7505,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (15 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7695,14 +7524,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (16 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7714,14 +7543,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (17 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7733,14 +7562,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (18 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7752,14 +7581,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (19 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7771,14 +7600,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (20 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7790,14 +7619,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (21 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7809,14 +7638,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (22 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7828,14 +7657,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (23 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7847,14 +7676,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (24 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7866,14 +7695,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (25 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7885,14 +7714,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (26 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7904,14 +7733,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (27 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7923,14 +7752,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (28 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7942,14 +7771,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (29 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7961,14 +7790,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (30 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7980,14 +7809,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (31 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -7999,14 +7828,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (32 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8018,14 +7847,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (33 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8037,14 +7866,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (34 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8056,14 +7885,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (35 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8075,14 +7904,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (36 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8094,14 +7923,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (37 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8113,14 +7942,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (38 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8132,14 +7961,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (39 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8151,14 +7980,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (40 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8170,14 +7999,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (41 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8189,14 +8018,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (42 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8208,14 +8037,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (43 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8227,14 +8056,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (44 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8246,14 +8075,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (45 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8265,14 +8094,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (46 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8284,14 +8113,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (47 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8303,14 +8132,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (48 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8322,14 +8151,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (49 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8341,14 +8170,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (50 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8360,14 +8189,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (51 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8379,14 +8208,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (52 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8398,14 +8227,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (53 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8417,14 +8246,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (54 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8436,14 +8265,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (55 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8455,14 +8284,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (56 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8474,14 +8303,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (57 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8493,14 +8322,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (58 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8512,14 +8341,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (59 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8531,14 +8360,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (60 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8550,14 +8379,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (61 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8569,14 +8398,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (62 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8588,14 +8417,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (63 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8607,14 +8436,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (64 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8626,14 +8455,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (65 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8645,14 +8474,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (66 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8664,14 +8493,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (67 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8683,14 +8512,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (68 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8702,14 +8531,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (69 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8721,14 +8550,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (70 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8740,14 +8569,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (71 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8759,14 +8588,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (72 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8778,14 +8607,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (73 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8797,14 +8626,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (74 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8816,14 +8645,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (75 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8835,14 +8664,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (76 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8854,14 +8683,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (77 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8873,14 +8702,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (78 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8892,14 +8721,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (79 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8911,14 +8740,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (80 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8930,14 +8759,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (81 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8949,14 +8778,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (82 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8968,14 +8797,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (83 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -8987,14 +8816,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (84 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9006,14 +8835,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (85 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9025,14 +8854,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (86 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9044,14 +8873,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (87 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9063,14 +8892,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (88 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9082,14 +8911,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (89 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9101,14 +8930,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (90 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9120,14 +8949,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (91 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9139,14 +8968,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (92 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9158,14 +8987,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (93 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9177,14 +9006,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (94 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9196,14 +9025,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (95 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9215,14 +9044,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (96 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9234,14 +9063,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (97 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9253,14 +9082,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (98 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9272,14 +9101,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (99 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9291,14 +9120,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (100 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9310,14 +9139,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (101 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9329,14 +9158,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (102 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9348,14 +9177,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (103 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9367,14 +9196,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (104 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9386,14 +9215,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (105 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9405,14 +9234,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (106 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9424,14 +9253,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (107 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9443,14 +9272,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (108 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9462,14 +9291,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (109 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9481,14 +9310,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (110 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9500,14 +9329,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (111 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9519,14 +9348,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (112 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9538,14 +9367,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (113 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9557,14 +9386,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (114 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9576,14 +9405,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (115 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9595,14 +9424,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (116 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9614,14 +9443,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (117 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9633,14 +9462,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (118 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9652,14 +9481,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (119 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9671,14 +9500,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (120 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9690,14 +9519,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (121 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9709,14 +9538,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (122 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9728,14 +9557,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (123 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9747,14 +9576,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (124 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9766,14 +9595,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (125 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9785,14 +9614,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (126 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9804,14 +9633,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (127 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9823,14 +9652,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (128 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9842,14 +9671,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (129 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9861,14 +9690,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (130 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9880,14 +9709,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (131 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9899,14 +9728,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (132 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9918,14 +9747,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (133 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9937,14 +9766,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (134 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9956,14 +9785,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (135 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9975,14 +9804,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (136 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -9994,14 +9823,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (137 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10013,14 +9842,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (138 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10032,14 +9861,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (139 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10051,14 +9880,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (140 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10070,14 +9899,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (141 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10089,14 +9918,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (142 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10108,14 +9937,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (143 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10127,14 +9956,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (144 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10146,14 +9975,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (145 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10165,14 +9994,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (146 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10184,14 +10013,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (147 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10203,14 +10032,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (148 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10222,14 +10051,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (149 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10241,14 +10070,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (150 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10260,14 +10089,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (151 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10279,14 +10108,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (152 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10298,14 +10127,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (153 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10317,14 +10146,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (154 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10336,14 +10165,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (155 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10355,14 +10184,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (156 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10374,14 +10203,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (157 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10393,14 +10222,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (158 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10412,14 +10241,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (159 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10431,14 +10260,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (160 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10450,14 +10279,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (161 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10469,14 +10298,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (162 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10488,14 +10317,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (163 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10507,14 +10336,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (164 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10526,14 +10355,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (165 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10545,14 +10374,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (166 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10564,14 +10393,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (167 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10583,14 +10412,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (168 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10602,14 +10431,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (169 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10621,14 +10450,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (170 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10640,14 +10469,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (171 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10659,14 +10488,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (172 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10678,14 +10507,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (173 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10697,14 +10526,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (174 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10716,14 +10545,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (175 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10735,14 +10564,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (176 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10754,14 +10583,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (177 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10773,14 +10602,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (178 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10792,14 +10621,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (179 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10811,14 +10640,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (180 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10830,14 +10659,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (181 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10849,14 +10678,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (182 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10868,14 +10697,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (183 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10887,14 +10716,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (184 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10906,14 +10735,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (185 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10925,14 +10754,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (186 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10944,14 +10773,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (187 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10963,14 +10792,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (188 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -10982,14 +10811,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (189 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11001,14 +10830,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (190 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11020,14 +10849,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (191 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11039,14 +10868,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (192 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11058,14 +10887,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (193 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11077,14 +10906,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (194 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11096,14 +10925,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (195 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11115,14 +10944,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (196 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11134,14 +10963,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (197 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11153,14 +10982,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (198 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11172,14 +11001,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (199 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11191,14 +11020,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (200 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11210,14 +11039,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (201 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11229,14 +11058,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (202 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11248,14 +11077,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (203 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11267,14 +11096,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (204 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11286,14 +11115,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (205 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11305,14 +11134,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (206 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11324,14 +11153,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (207 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11343,14 +11172,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (208 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11362,14 +11191,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (209 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11381,14 +11210,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (210 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11400,14 +11229,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (211 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11419,14 +11248,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (212 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11438,14 +11267,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (213 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11457,14 +11286,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (214 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11476,14 +11305,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (215 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11495,14 +11324,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (216 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11514,14 +11343,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (217 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11533,14 +11362,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (218 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11552,14 +11381,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (219 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11571,14 +11400,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (220 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11590,14 +11419,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (221 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11609,14 +11438,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (222 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11628,14 +11457,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (223 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11647,14 +11476,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (224 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11666,14 +11495,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (225 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11685,14 +11514,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (226 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11704,14 +11533,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (227 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11723,14 +11552,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (228 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11742,14 +11571,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (229 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11761,14 +11590,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (230 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11780,14 +11609,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (231 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11799,14 +11628,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (232 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11818,14 +11647,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (233 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11837,14 +11666,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (234 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11856,14 +11685,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (235 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11875,14 +11704,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (236 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11894,14 +11723,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (237 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11913,14 +11742,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (238 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11932,14 +11761,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (239 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11951,14 +11780,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (240 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11970,14 +11799,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (241 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -11989,18 +11818,18 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (242 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12008,14 +11837,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (243 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -12027,18 +11856,18 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (244 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12046,18 +11875,18 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (245 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12065,18 +11894,18 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (246 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12084,18 +11913,18 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (247 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12103,18 +11932,18 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (248 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12122,14 +11951,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (249 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -12141,14 +11970,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (250 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -12160,14 +11989,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (251 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -12179,14 +12008,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (252 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -12198,14 +12027,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (253 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -12217,14 +12046,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (254 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
 /* ------------------------------ */
     .balign 128
@@ -12236,14 +12065,14 @@
  * handler.  Note that the call to MterpCheckBefore is done as a tail call.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     dla     ra, artMterpAsmInstructionStart
     dla     t9, MterpCheckBefore
     move    a0, rSELF
     daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rPC
     daddu   ra, ra, (255 * 128)            # Addr of primary handler.
-    jalr    zero, t9                            # (self, shadow_frame) Note: tail call.
+    jalr    zero, t9                            # (self, shadow_frame, dex_pc_ptr) Note: tail call.
 
     .balign 128
     .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
@@ -12323,23 +12152,110 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in ra.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 64 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-    .extern MterpSuspendCheck
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranchNoFlags:
+    bgtzc   rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+    li      v0, JIT_CHECK_OSR
+    beqc    rPROFILE, v0, .L_osr_check
+    bltc    rPROFILE, v0, .L_resume_backward_branch
+    dsubu   rPROFILE, 1
+    beqzc   rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     REFRESH_IBASE
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bnez    ra, check1
-    GET_INST_OPCODE v0                              # extract opcode from rINST
-    GOTO_OPCODE v0                                  # jump to next instruction
-check1:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bnezc   ra, .L_suspend_request_pending
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_suspend_request_pending:
     EXPORT_PC
     move    a0, rSELF
-    jal     MterpSuspendCheck                       # (self)
-    bnezc   v0, MterpFallback                       # Something in the environment changed, switch interpreters
-    GET_INST_OPCODE v0                              # extract opcode from rINST
-    GOTO_OPCODE v0                                  # jump to next instruction
+    jal     MterpSuspendCheck           # (self)
+    bnezc   v0, MterpFallback
+    REFRESH_IBASE                       # might have changed during suspend
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_no_count_backwards:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bnec    rPROFILE, v0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal MterpMaybeDoOnStackReplacement  # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beqc    rPROFILE, v0, .L_check_osr_forward
+.L_resume_forward_branch:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    ld      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2 
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
 
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
@@ -12395,6 +12311,28 @@
 check2:
     li      v0, 1                                   # signal return to caller.
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
+    ld      s6, STACK_OFFSET_S6(sp)
+    .cfi_restore 22
     ld      s5, STACK_OFFSET_S5(sp)
     .cfi_restore 21
     ld      s4, STACK_OFFSET_S4(sp)
@@ -12421,5 +12359,6 @@
     .cfi_adjust_cfa_offset -STACK_SIZE
 
     .cfi_endproc
+    .set    reorder
     .size ExecuteMterpImpl, .-ExecuteMterpImpl
 
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index f78e1bc..29ee248 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -6201,8 +6201,12 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f3: /* 0xf3 */
+/* File: x86/op_unused_f3.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
@@ -6219,36 +6223,56 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f5: /* 0xf5 */
+/* File: x86/op_unused_f5.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f6: /* 0xf6 */
+/* File: x86/op_unused_f6.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f7: /* 0xf7 */
+/* File: x86/op_unused_f7.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f8: /* 0xf8 */
+/* File: x86/op_unused_f8.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f9: /* 0xf9 */
+/* File: x86/op_unused_f9.S */
+/* File: x86/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
@@ -6359,13 +6383,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(0*128)
 
@@ -6384,13 +6407,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(1*128)
 
@@ -6409,13 +6431,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(2*128)
 
@@ -6434,13 +6455,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(3*128)
 
@@ -6459,13 +6479,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(4*128)
 
@@ -6484,13 +6503,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(5*128)
 
@@ -6509,13 +6527,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(6*128)
 
@@ -6534,13 +6551,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(7*128)
 
@@ -6559,13 +6575,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(8*128)
 
@@ -6584,13 +6599,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(9*128)
 
@@ -6609,13 +6623,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(10*128)
 
@@ -6634,13 +6647,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(11*128)
 
@@ -6659,13 +6671,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(12*128)
 
@@ -6684,13 +6695,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(13*128)
 
@@ -6709,13 +6719,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(14*128)
 
@@ -6734,13 +6743,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(15*128)
 
@@ -6759,13 +6767,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(16*128)
 
@@ -6784,13 +6791,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(17*128)
 
@@ -6809,13 +6815,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(18*128)
 
@@ -6834,13 +6839,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(19*128)
 
@@ -6859,13 +6863,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(20*128)
 
@@ -6884,13 +6887,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(21*128)
 
@@ -6909,13 +6911,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(22*128)
 
@@ -6934,13 +6935,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(23*128)
 
@@ -6959,13 +6959,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(24*128)
 
@@ -6984,13 +6983,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(25*128)
 
@@ -7009,13 +7007,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(26*128)
 
@@ -7034,13 +7031,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(27*128)
 
@@ -7059,13 +7055,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(28*128)
 
@@ -7084,13 +7079,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(29*128)
 
@@ -7109,13 +7103,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(30*128)
 
@@ -7134,13 +7127,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(31*128)
 
@@ -7159,13 +7151,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(32*128)
 
@@ -7184,13 +7175,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(33*128)
 
@@ -7209,13 +7199,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(34*128)
 
@@ -7234,13 +7223,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(35*128)
 
@@ -7259,13 +7247,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(36*128)
 
@@ -7284,13 +7271,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(37*128)
 
@@ -7309,13 +7295,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(38*128)
 
@@ -7334,13 +7319,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(39*128)
 
@@ -7359,13 +7343,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(40*128)
 
@@ -7384,13 +7367,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(41*128)
 
@@ -7409,13 +7391,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(42*128)
 
@@ -7434,13 +7415,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(43*128)
 
@@ -7459,13 +7439,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(44*128)
 
@@ -7484,13 +7463,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(45*128)
 
@@ -7509,13 +7487,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(46*128)
 
@@ -7534,13 +7511,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(47*128)
 
@@ -7559,13 +7535,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(48*128)
 
@@ -7584,13 +7559,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(49*128)
 
@@ -7609,13 +7583,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(50*128)
 
@@ -7634,13 +7607,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(51*128)
 
@@ -7659,13 +7631,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(52*128)
 
@@ -7684,13 +7655,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(53*128)
 
@@ -7709,13 +7679,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(54*128)
 
@@ -7734,13 +7703,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(55*128)
 
@@ -7759,13 +7727,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(56*128)
 
@@ -7784,13 +7751,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(57*128)
 
@@ -7809,13 +7775,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(58*128)
 
@@ -7834,13 +7799,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(59*128)
 
@@ -7859,13 +7823,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(60*128)
 
@@ -7884,13 +7847,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(61*128)
 
@@ -7909,13 +7871,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(62*128)
 
@@ -7934,13 +7895,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(63*128)
 
@@ -7959,13 +7919,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(64*128)
 
@@ -7984,13 +7943,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(65*128)
 
@@ -8009,13 +7967,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(66*128)
 
@@ -8034,13 +7991,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(67*128)
 
@@ -8059,13 +8015,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(68*128)
 
@@ -8084,13 +8039,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(69*128)
 
@@ -8109,13 +8063,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(70*128)
 
@@ -8134,13 +8087,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(71*128)
 
@@ -8159,13 +8111,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(72*128)
 
@@ -8184,13 +8135,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(73*128)
 
@@ -8209,13 +8159,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(74*128)
 
@@ -8234,13 +8183,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(75*128)
 
@@ -8259,13 +8207,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(76*128)
 
@@ -8284,13 +8231,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(77*128)
 
@@ -8309,13 +8255,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(78*128)
 
@@ -8334,13 +8279,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(79*128)
 
@@ -8359,13 +8303,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(80*128)
 
@@ -8384,13 +8327,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(81*128)
 
@@ -8409,13 +8351,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(82*128)
 
@@ -8434,13 +8375,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(83*128)
 
@@ -8459,13 +8399,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(84*128)
 
@@ -8484,13 +8423,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(85*128)
 
@@ -8509,13 +8447,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(86*128)
 
@@ -8534,13 +8471,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(87*128)
 
@@ -8559,13 +8495,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(88*128)
 
@@ -8584,13 +8519,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(89*128)
 
@@ -8609,13 +8543,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(90*128)
 
@@ -8634,13 +8567,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(91*128)
 
@@ -8659,13 +8591,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(92*128)
 
@@ -8684,13 +8615,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(93*128)
 
@@ -8709,13 +8639,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(94*128)
 
@@ -8734,13 +8663,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(95*128)
 
@@ -8759,13 +8687,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(96*128)
 
@@ -8784,13 +8711,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(97*128)
 
@@ -8809,13 +8735,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(98*128)
 
@@ -8834,13 +8759,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(99*128)
 
@@ -8859,13 +8783,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(100*128)
 
@@ -8884,13 +8807,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(101*128)
 
@@ -8909,13 +8831,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(102*128)
 
@@ -8934,13 +8855,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(103*128)
 
@@ -8959,13 +8879,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(104*128)
 
@@ -8984,13 +8903,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(105*128)
 
@@ -9009,13 +8927,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(106*128)
 
@@ -9034,13 +8951,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(107*128)
 
@@ -9059,13 +8975,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(108*128)
 
@@ -9084,13 +8999,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(109*128)
 
@@ -9109,13 +9023,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(110*128)
 
@@ -9134,13 +9047,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(111*128)
 
@@ -9159,13 +9071,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(112*128)
 
@@ -9184,13 +9095,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(113*128)
 
@@ -9209,13 +9119,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(114*128)
 
@@ -9234,13 +9143,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(115*128)
 
@@ -9259,13 +9167,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(116*128)
 
@@ -9284,13 +9191,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(117*128)
 
@@ -9309,13 +9215,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(118*128)
 
@@ -9334,13 +9239,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(119*128)
 
@@ -9359,13 +9263,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(120*128)
 
@@ -9384,13 +9287,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(121*128)
 
@@ -9409,13 +9311,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(122*128)
 
@@ -9434,13 +9335,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(123*128)
 
@@ -9459,13 +9359,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(124*128)
 
@@ -9484,13 +9383,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(125*128)
 
@@ -9509,13 +9407,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(126*128)
 
@@ -9534,13 +9431,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(127*128)
 
@@ -9559,13 +9455,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(128*128)
 
@@ -9584,13 +9479,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(129*128)
 
@@ -9609,13 +9503,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(130*128)
 
@@ -9634,13 +9527,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(131*128)
 
@@ -9659,13 +9551,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(132*128)
 
@@ -9684,13 +9575,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(133*128)
 
@@ -9709,13 +9599,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(134*128)
 
@@ -9734,13 +9623,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(135*128)
 
@@ -9759,13 +9647,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(136*128)
 
@@ -9784,13 +9671,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(137*128)
 
@@ -9809,13 +9695,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(138*128)
 
@@ -9834,13 +9719,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(139*128)
 
@@ -9859,13 +9743,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(140*128)
 
@@ -9884,13 +9767,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(141*128)
 
@@ -9909,13 +9791,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(142*128)
 
@@ -9934,13 +9815,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(143*128)
 
@@ -9959,13 +9839,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(144*128)
 
@@ -9984,13 +9863,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(145*128)
 
@@ -10009,13 +9887,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(146*128)
 
@@ -10034,13 +9911,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(147*128)
 
@@ -10059,13 +9935,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(148*128)
 
@@ -10084,13 +9959,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(149*128)
 
@@ -10109,13 +9983,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(150*128)
 
@@ -10134,13 +10007,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(151*128)
 
@@ -10159,13 +10031,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(152*128)
 
@@ -10184,13 +10055,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(153*128)
 
@@ -10209,13 +10079,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(154*128)
 
@@ -10234,13 +10103,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(155*128)
 
@@ -10259,13 +10127,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(156*128)
 
@@ -10284,13 +10151,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(157*128)
 
@@ -10309,13 +10175,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(158*128)
 
@@ -10334,13 +10199,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(159*128)
 
@@ -10359,13 +10223,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(160*128)
 
@@ -10384,13 +10247,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(161*128)
 
@@ -10409,13 +10271,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(162*128)
 
@@ -10434,13 +10295,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(163*128)
 
@@ -10459,13 +10319,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(164*128)
 
@@ -10484,13 +10343,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(165*128)
 
@@ -10509,13 +10367,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(166*128)
 
@@ -10534,13 +10391,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(167*128)
 
@@ -10559,13 +10415,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(168*128)
 
@@ -10584,13 +10439,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(169*128)
 
@@ -10609,13 +10463,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(170*128)
 
@@ -10634,13 +10487,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(171*128)
 
@@ -10659,13 +10511,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(172*128)
 
@@ -10684,13 +10535,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(173*128)
 
@@ -10709,13 +10559,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(174*128)
 
@@ -10734,13 +10583,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(175*128)
 
@@ -10759,13 +10607,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(176*128)
 
@@ -10784,13 +10631,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(177*128)
 
@@ -10809,13 +10655,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(178*128)
 
@@ -10834,13 +10679,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(179*128)
 
@@ -10859,13 +10703,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(180*128)
 
@@ -10884,13 +10727,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(181*128)
 
@@ -10909,13 +10751,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(182*128)
 
@@ -10934,13 +10775,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(183*128)
 
@@ -10959,13 +10799,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(184*128)
 
@@ -10984,13 +10823,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(185*128)
 
@@ -11009,13 +10847,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(186*128)
 
@@ -11034,13 +10871,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(187*128)
 
@@ -11059,13 +10895,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(188*128)
 
@@ -11084,13 +10919,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(189*128)
 
@@ -11109,13 +10943,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(190*128)
 
@@ -11134,13 +10967,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(191*128)
 
@@ -11159,13 +10991,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(192*128)
 
@@ -11184,13 +11015,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(193*128)
 
@@ -11209,13 +11039,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(194*128)
 
@@ -11234,13 +11063,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(195*128)
 
@@ -11259,13 +11087,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(196*128)
 
@@ -11284,13 +11111,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(197*128)
 
@@ -11309,13 +11135,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(198*128)
 
@@ -11334,13 +11159,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(199*128)
 
@@ -11359,13 +11183,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(200*128)
 
@@ -11384,13 +11207,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(201*128)
 
@@ -11409,13 +11231,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(202*128)
 
@@ -11434,13 +11255,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(203*128)
 
@@ -11459,13 +11279,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(204*128)
 
@@ -11484,13 +11303,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(205*128)
 
@@ -11509,13 +11327,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(206*128)
 
@@ -11534,13 +11351,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(207*128)
 
@@ -11559,13 +11375,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(208*128)
 
@@ -11584,13 +11399,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(209*128)
 
@@ -11609,13 +11423,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(210*128)
 
@@ -11634,13 +11447,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(211*128)
 
@@ -11659,13 +11471,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(212*128)
 
@@ -11684,13 +11495,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(213*128)
 
@@ -11709,13 +11519,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(214*128)
 
@@ -11734,13 +11543,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(215*128)
 
@@ -11759,13 +11567,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(216*128)
 
@@ -11784,13 +11591,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(217*128)
 
@@ -11809,13 +11615,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(218*128)
 
@@ -11834,13 +11639,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(219*128)
 
@@ -11859,13 +11663,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(220*128)
 
@@ -11884,13 +11687,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(221*128)
 
@@ -11909,13 +11711,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(222*128)
 
@@ -11934,13 +11735,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(223*128)
 
@@ -11959,13 +11759,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(224*128)
 
@@ -11984,13 +11783,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(225*128)
 
@@ -12009,13 +11807,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(226*128)
 
@@ -12034,13 +11831,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(227*128)
 
@@ -12059,13 +11855,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(228*128)
 
@@ -12084,13 +11879,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(229*128)
 
@@ -12109,13 +11903,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(230*128)
 
@@ -12134,13 +11927,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(231*128)
 
@@ -12159,13 +11951,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(232*128)
 
@@ -12184,13 +11975,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(233*128)
 
@@ -12209,13 +11999,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(234*128)
 
@@ -12234,13 +12023,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(235*128)
 
@@ -12259,13 +12047,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(236*128)
 
@@ -12284,13 +12071,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(237*128)
 
@@ -12309,13 +12095,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(238*128)
 
@@ -12334,13 +12119,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(239*128)
 
@@ -12359,13 +12143,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(240*128)
 
@@ -12384,13 +12167,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(241*128)
 
@@ -12409,19 +12191,18 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(242*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12434,13 +12215,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(243*128)
 
@@ -12459,19 +12239,18 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(244*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12484,19 +12263,18 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(245*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12509,19 +12287,18 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(246*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12534,19 +12311,18 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(247*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12559,19 +12335,18 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(248*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12584,13 +12359,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(249*128)
 
@@ -12609,13 +12383,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(250*128)
 
@@ -12634,13 +12407,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(251*128)
 
@@ -12659,13 +12431,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(252*128)
 
@@ -12684,13 +12455,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(253*128)
 
@@ -12709,13 +12479,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(254*128)
 
@@ -12734,13 +12503,12 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(255*128)
 
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index 031cec8..bc1abcc 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -965,8 +965,8 @@
 /* File: x86_64/op_fill_array_data.S */
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC
-    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
-    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + BBBBbbbb*2
+    movslq  2(rPC), %rcx                    # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG0, rINSTq            # OUT_ARG0 <- vAA (array object)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     testb   %al, %al                        # 0 means an exception is thrown
@@ -1051,8 +1051,8 @@
  * for: packed-switch, sparse-switch
  */
     /* op vAA, +BBBB */
-    movslq  2(rPC), OUT_ARG0                # rcx <- BBBBbbbb
-    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoPackedSwitch)
     testl   %eax, %eax
@@ -1074,8 +1074,8 @@
  * for: packed-switch, sparse-switch
  */
     /* op vAA, +BBBB */
-    movslq  2(rPC), OUT_ARG0                # rcx <- BBBBbbbb
-    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoSparseSwitch)
     testl   %eax, %eax
@@ -5966,8 +5966,12 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_invoke_lambda: /* 0xf3 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f3: /* 0xf3 */
+/* File: x86_64/op_unused_f3.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
@@ -5984,36 +5988,56 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_capture_variable: /* 0xf5 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f5: /* 0xf5 */
+/* File: x86_64/op_unused_f5.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_create_lambda: /* 0xf6 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f6: /* 0xf6 */
+/* File: x86_64/op_unused_f6.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_liberate_variable: /* 0xf7 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f7: /* 0xf7 */
+/* File: x86_64/op_unused_f7.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_box_lambda: /* 0xf8 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f8: /* 0xf8 */
+/* File: x86_64/op_unused_f8.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unbox_lambda: /* 0xf9 */
-/* Transfer stub to alternate interpreter */
+.L_op_unused_f9: /* 0xf9 */
+/* File: x86_64/op_unused_f9.S */
+/* File: x86_64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
     jmp     MterpFallback
 
 
@@ -6124,11 +6148,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(0*128)
 
 /* ------------------------------ */
@@ -6146,11 +6170,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(1*128)
 
 /* ------------------------------ */
@@ -6168,11 +6192,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(2*128)
 
 /* ------------------------------ */
@@ -6190,11 +6214,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(3*128)
 
 /* ------------------------------ */
@@ -6212,11 +6236,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(4*128)
 
 /* ------------------------------ */
@@ -6234,11 +6258,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(5*128)
 
 /* ------------------------------ */
@@ -6256,11 +6280,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(6*128)
 
 /* ------------------------------ */
@@ -6278,11 +6302,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(7*128)
 
 /* ------------------------------ */
@@ -6300,11 +6324,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(8*128)
 
 /* ------------------------------ */
@@ -6322,11 +6346,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(9*128)
 
 /* ------------------------------ */
@@ -6344,11 +6368,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(10*128)
 
 /* ------------------------------ */
@@ -6366,11 +6390,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(11*128)
 
 /* ------------------------------ */
@@ -6388,11 +6412,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(12*128)
 
 /* ------------------------------ */
@@ -6410,11 +6434,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(13*128)
 
 /* ------------------------------ */
@@ -6432,11 +6456,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(14*128)
 
 /* ------------------------------ */
@@ -6454,11 +6478,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(15*128)
 
 /* ------------------------------ */
@@ -6476,11 +6500,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(16*128)
 
 /* ------------------------------ */
@@ -6498,11 +6522,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(17*128)
 
 /* ------------------------------ */
@@ -6520,11 +6544,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(18*128)
 
 /* ------------------------------ */
@@ -6542,11 +6566,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(19*128)
 
 /* ------------------------------ */
@@ -6564,11 +6588,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(20*128)
 
 /* ------------------------------ */
@@ -6586,11 +6610,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(21*128)
 
 /* ------------------------------ */
@@ -6608,11 +6632,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(22*128)
 
 /* ------------------------------ */
@@ -6630,11 +6654,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(23*128)
 
 /* ------------------------------ */
@@ -6652,11 +6676,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(24*128)
 
 /* ------------------------------ */
@@ -6674,11 +6698,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(25*128)
 
 /* ------------------------------ */
@@ -6696,11 +6720,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(26*128)
 
 /* ------------------------------ */
@@ -6718,11 +6742,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(27*128)
 
 /* ------------------------------ */
@@ -6740,11 +6764,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(28*128)
 
 /* ------------------------------ */
@@ -6762,11 +6786,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(29*128)
 
 /* ------------------------------ */
@@ -6784,11 +6808,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(30*128)
 
 /* ------------------------------ */
@@ -6806,11 +6830,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(31*128)
 
 /* ------------------------------ */
@@ -6828,11 +6852,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(32*128)
 
 /* ------------------------------ */
@@ -6850,11 +6874,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(33*128)
 
 /* ------------------------------ */
@@ -6872,11 +6896,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(34*128)
 
 /* ------------------------------ */
@@ -6894,11 +6918,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(35*128)
 
 /* ------------------------------ */
@@ -6916,11 +6940,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(36*128)
 
 /* ------------------------------ */
@@ -6938,11 +6962,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(37*128)
 
 /* ------------------------------ */
@@ -6960,11 +6984,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(38*128)
 
 /* ------------------------------ */
@@ -6982,11 +7006,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(39*128)
 
 /* ------------------------------ */
@@ -7004,11 +7028,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(40*128)
 
 /* ------------------------------ */
@@ -7026,11 +7050,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(41*128)
 
 /* ------------------------------ */
@@ -7048,11 +7072,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(42*128)
 
 /* ------------------------------ */
@@ -7070,11 +7094,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(43*128)
 
 /* ------------------------------ */
@@ -7092,11 +7116,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(44*128)
 
 /* ------------------------------ */
@@ -7114,11 +7138,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(45*128)
 
 /* ------------------------------ */
@@ -7136,11 +7160,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(46*128)
 
 /* ------------------------------ */
@@ -7158,11 +7182,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(47*128)
 
 /* ------------------------------ */
@@ -7180,11 +7204,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(48*128)
 
 /* ------------------------------ */
@@ -7202,11 +7226,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(49*128)
 
 /* ------------------------------ */
@@ -7224,11 +7248,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(50*128)
 
 /* ------------------------------ */
@@ -7246,11 +7270,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(51*128)
 
 /* ------------------------------ */
@@ -7268,11 +7292,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(52*128)
 
 /* ------------------------------ */
@@ -7290,11 +7314,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(53*128)
 
 /* ------------------------------ */
@@ -7312,11 +7336,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(54*128)
 
 /* ------------------------------ */
@@ -7334,11 +7358,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(55*128)
 
 /* ------------------------------ */
@@ -7356,11 +7380,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(56*128)
 
 /* ------------------------------ */
@@ -7378,11 +7402,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(57*128)
 
 /* ------------------------------ */
@@ -7400,11 +7424,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(58*128)
 
 /* ------------------------------ */
@@ -7422,11 +7446,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(59*128)
 
 /* ------------------------------ */
@@ -7444,11 +7468,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(60*128)
 
 /* ------------------------------ */
@@ -7466,11 +7490,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(61*128)
 
 /* ------------------------------ */
@@ -7488,11 +7512,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(62*128)
 
 /* ------------------------------ */
@@ -7510,11 +7534,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(63*128)
 
 /* ------------------------------ */
@@ -7532,11 +7556,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(64*128)
 
 /* ------------------------------ */
@@ -7554,11 +7578,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(65*128)
 
 /* ------------------------------ */
@@ -7576,11 +7600,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(66*128)
 
 /* ------------------------------ */
@@ -7598,11 +7622,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(67*128)
 
 /* ------------------------------ */
@@ -7620,11 +7644,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(68*128)
 
 /* ------------------------------ */
@@ -7642,11 +7666,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(69*128)
 
 /* ------------------------------ */
@@ -7664,11 +7688,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(70*128)
 
 /* ------------------------------ */
@@ -7686,11 +7710,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(71*128)
 
 /* ------------------------------ */
@@ -7708,11 +7732,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(72*128)
 
 /* ------------------------------ */
@@ -7730,11 +7754,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(73*128)
 
 /* ------------------------------ */
@@ -7752,11 +7776,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(74*128)
 
 /* ------------------------------ */
@@ -7774,11 +7798,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(75*128)
 
 /* ------------------------------ */
@@ -7796,11 +7820,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(76*128)
 
 /* ------------------------------ */
@@ -7818,11 +7842,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(77*128)
 
 /* ------------------------------ */
@@ -7840,11 +7864,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(78*128)
 
 /* ------------------------------ */
@@ -7862,11 +7886,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(79*128)
 
 /* ------------------------------ */
@@ -7884,11 +7908,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(80*128)
 
 /* ------------------------------ */
@@ -7906,11 +7930,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(81*128)
 
 /* ------------------------------ */
@@ -7928,11 +7952,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(82*128)
 
 /* ------------------------------ */
@@ -7950,11 +7974,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(83*128)
 
 /* ------------------------------ */
@@ -7972,11 +7996,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(84*128)
 
 /* ------------------------------ */
@@ -7994,11 +8018,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(85*128)
 
 /* ------------------------------ */
@@ -8016,11 +8040,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(86*128)
 
 /* ------------------------------ */
@@ -8038,11 +8062,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(87*128)
 
 /* ------------------------------ */
@@ -8060,11 +8084,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(88*128)
 
 /* ------------------------------ */
@@ -8082,11 +8106,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(89*128)
 
 /* ------------------------------ */
@@ -8104,11 +8128,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(90*128)
 
 /* ------------------------------ */
@@ -8126,11 +8150,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(91*128)
 
 /* ------------------------------ */
@@ -8148,11 +8172,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(92*128)
 
 /* ------------------------------ */
@@ -8170,11 +8194,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(93*128)
 
 /* ------------------------------ */
@@ -8192,11 +8216,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(94*128)
 
 /* ------------------------------ */
@@ -8214,11 +8238,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(95*128)
 
 /* ------------------------------ */
@@ -8236,11 +8260,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(96*128)
 
 /* ------------------------------ */
@@ -8258,11 +8282,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(97*128)
 
 /* ------------------------------ */
@@ -8280,11 +8304,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(98*128)
 
 /* ------------------------------ */
@@ -8302,11 +8326,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(99*128)
 
 /* ------------------------------ */
@@ -8324,11 +8348,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(100*128)
 
 /* ------------------------------ */
@@ -8346,11 +8370,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(101*128)
 
 /* ------------------------------ */
@@ -8368,11 +8392,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(102*128)
 
 /* ------------------------------ */
@@ -8390,11 +8414,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(103*128)
 
 /* ------------------------------ */
@@ -8412,11 +8436,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(104*128)
 
 /* ------------------------------ */
@@ -8434,11 +8458,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(105*128)
 
 /* ------------------------------ */
@@ -8456,11 +8480,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(106*128)
 
 /* ------------------------------ */
@@ -8478,11 +8502,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(107*128)
 
 /* ------------------------------ */
@@ -8500,11 +8524,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(108*128)
 
 /* ------------------------------ */
@@ -8522,11 +8546,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(109*128)
 
 /* ------------------------------ */
@@ -8544,11 +8568,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(110*128)
 
 /* ------------------------------ */
@@ -8566,11 +8590,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(111*128)
 
 /* ------------------------------ */
@@ -8588,11 +8612,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(112*128)
 
 /* ------------------------------ */
@@ -8610,11 +8634,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(113*128)
 
 /* ------------------------------ */
@@ -8632,11 +8656,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(114*128)
 
 /* ------------------------------ */
@@ -8654,11 +8678,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(115*128)
 
 /* ------------------------------ */
@@ -8676,11 +8700,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(116*128)
 
 /* ------------------------------ */
@@ -8698,11 +8722,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(117*128)
 
 /* ------------------------------ */
@@ -8720,11 +8744,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(118*128)
 
 /* ------------------------------ */
@@ -8742,11 +8766,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(119*128)
 
 /* ------------------------------ */
@@ -8764,11 +8788,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(120*128)
 
 /* ------------------------------ */
@@ -8786,11 +8810,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(121*128)
 
 /* ------------------------------ */
@@ -8808,11 +8832,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(122*128)
 
 /* ------------------------------ */
@@ -8830,11 +8854,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(123*128)
 
 /* ------------------------------ */
@@ -8852,11 +8876,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(124*128)
 
 /* ------------------------------ */
@@ -8874,11 +8898,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(125*128)
 
 /* ------------------------------ */
@@ -8896,11 +8920,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(126*128)
 
 /* ------------------------------ */
@@ -8918,11 +8942,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(127*128)
 
 /* ------------------------------ */
@@ -8940,11 +8964,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(128*128)
 
 /* ------------------------------ */
@@ -8962,11 +8986,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(129*128)
 
 /* ------------------------------ */
@@ -8984,11 +9008,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(130*128)
 
 /* ------------------------------ */
@@ -9006,11 +9030,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(131*128)
 
 /* ------------------------------ */
@@ -9028,11 +9052,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(132*128)
 
 /* ------------------------------ */
@@ -9050,11 +9074,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(133*128)
 
 /* ------------------------------ */
@@ -9072,11 +9096,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(134*128)
 
 /* ------------------------------ */
@@ -9094,11 +9118,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(135*128)
 
 /* ------------------------------ */
@@ -9116,11 +9140,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(136*128)
 
 /* ------------------------------ */
@@ -9138,11 +9162,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(137*128)
 
 /* ------------------------------ */
@@ -9160,11 +9184,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(138*128)
 
 /* ------------------------------ */
@@ -9182,11 +9206,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(139*128)
 
 /* ------------------------------ */
@@ -9204,11 +9228,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(140*128)
 
 /* ------------------------------ */
@@ -9226,11 +9250,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(141*128)
 
 /* ------------------------------ */
@@ -9248,11 +9272,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(142*128)
 
 /* ------------------------------ */
@@ -9270,11 +9294,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(143*128)
 
 /* ------------------------------ */
@@ -9292,11 +9316,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(144*128)
 
 /* ------------------------------ */
@@ -9314,11 +9338,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(145*128)
 
 /* ------------------------------ */
@@ -9336,11 +9360,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(146*128)
 
 /* ------------------------------ */
@@ -9358,11 +9382,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(147*128)
 
 /* ------------------------------ */
@@ -9380,11 +9404,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(148*128)
 
 /* ------------------------------ */
@@ -9402,11 +9426,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(149*128)
 
 /* ------------------------------ */
@@ -9424,11 +9448,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(150*128)
 
 /* ------------------------------ */
@@ -9446,11 +9470,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(151*128)
 
 /* ------------------------------ */
@@ -9468,11 +9492,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(152*128)
 
 /* ------------------------------ */
@@ -9490,11 +9514,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(153*128)
 
 /* ------------------------------ */
@@ -9512,11 +9536,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(154*128)
 
 /* ------------------------------ */
@@ -9534,11 +9558,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(155*128)
 
 /* ------------------------------ */
@@ -9556,11 +9580,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(156*128)
 
 /* ------------------------------ */
@@ -9578,11 +9602,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(157*128)
 
 /* ------------------------------ */
@@ -9600,11 +9624,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(158*128)
 
 /* ------------------------------ */
@@ -9622,11 +9646,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(159*128)
 
 /* ------------------------------ */
@@ -9644,11 +9668,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(160*128)
 
 /* ------------------------------ */
@@ -9666,11 +9690,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(161*128)
 
 /* ------------------------------ */
@@ -9688,11 +9712,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(162*128)
 
 /* ------------------------------ */
@@ -9710,11 +9734,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(163*128)
 
 /* ------------------------------ */
@@ -9732,11 +9756,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(164*128)
 
 /* ------------------------------ */
@@ -9754,11 +9778,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(165*128)
 
 /* ------------------------------ */
@@ -9776,11 +9800,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(166*128)
 
 /* ------------------------------ */
@@ -9798,11 +9822,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(167*128)
 
 /* ------------------------------ */
@@ -9820,11 +9844,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(168*128)
 
 /* ------------------------------ */
@@ -9842,11 +9866,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(169*128)
 
 /* ------------------------------ */
@@ -9864,11 +9888,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(170*128)
 
 /* ------------------------------ */
@@ -9886,11 +9910,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(171*128)
 
 /* ------------------------------ */
@@ -9908,11 +9932,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(172*128)
 
 /* ------------------------------ */
@@ -9930,11 +9954,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(173*128)
 
 /* ------------------------------ */
@@ -9952,11 +9976,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(174*128)
 
 /* ------------------------------ */
@@ -9974,11 +9998,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(175*128)
 
 /* ------------------------------ */
@@ -9996,11 +10020,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(176*128)
 
 /* ------------------------------ */
@@ -10018,11 +10042,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(177*128)
 
 /* ------------------------------ */
@@ -10040,11 +10064,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(178*128)
 
 /* ------------------------------ */
@@ -10062,11 +10086,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(179*128)
 
 /* ------------------------------ */
@@ -10084,11 +10108,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(180*128)
 
 /* ------------------------------ */
@@ -10106,11 +10130,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(181*128)
 
 /* ------------------------------ */
@@ -10128,11 +10152,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(182*128)
 
 /* ------------------------------ */
@@ -10150,11 +10174,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(183*128)
 
 /* ------------------------------ */
@@ -10172,11 +10196,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(184*128)
 
 /* ------------------------------ */
@@ -10194,11 +10218,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(185*128)
 
 /* ------------------------------ */
@@ -10216,11 +10240,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(186*128)
 
 /* ------------------------------ */
@@ -10238,11 +10262,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(187*128)
 
 /* ------------------------------ */
@@ -10260,11 +10284,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(188*128)
 
 /* ------------------------------ */
@@ -10282,11 +10306,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(189*128)
 
 /* ------------------------------ */
@@ -10304,11 +10328,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(190*128)
 
 /* ------------------------------ */
@@ -10326,11 +10350,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(191*128)
 
 /* ------------------------------ */
@@ -10348,11 +10372,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(192*128)
 
 /* ------------------------------ */
@@ -10370,11 +10394,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(193*128)
 
 /* ------------------------------ */
@@ -10392,11 +10416,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(194*128)
 
 /* ------------------------------ */
@@ -10414,11 +10438,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(195*128)
 
 /* ------------------------------ */
@@ -10436,11 +10460,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(196*128)
 
 /* ------------------------------ */
@@ -10458,11 +10482,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(197*128)
 
 /* ------------------------------ */
@@ -10480,11 +10504,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(198*128)
 
 /* ------------------------------ */
@@ -10502,11 +10526,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(199*128)
 
 /* ------------------------------ */
@@ -10524,11 +10548,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(200*128)
 
 /* ------------------------------ */
@@ -10546,11 +10570,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(201*128)
 
 /* ------------------------------ */
@@ -10568,11 +10592,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(202*128)
 
 /* ------------------------------ */
@@ -10590,11 +10614,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(203*128)
 
 /* ------------------------------ */
@@ -10612,11 +10636,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(204*128)
 
 /* ------------------------------ */
@@ -10634,11 +10658,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(205*128)
 
 /* ------------------------------ */
@@ -10656,11 +10680,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(206*128)
 
 /* ------------------------------ */
@@ -10678,11 +10702,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(207*128)
 
 /* ------------------------------ */
@@ -10700,11 +10724,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(208*128)
 
 /* ------------------------------ */
@@ -10722,11 +10746,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(209*128)
 
 /* ------------------------------ */
@@ -10744,11 +10768,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(210*128)
 
 /* ------------------------------ */
@@ -10766,11 +10790,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(211*128)
 
 /* ------------------------------ */
@@ -10788,11 +10812,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(212*128)
 
 /* ------------------------------ */
@@ -10810,11 +10834,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(213*128)
 
 /* ------------------------------ */
@@ -10832,11 +10856,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(214*128)
 
 /* ------------------------------ */
@@ -10854,11 +10878,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(215*128)
 
 /* ------------------------------ */
@@ -10876,11 +10900,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(216*128)
 
 /* ------------------------------ */
@@ -10898,11 +10922,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(217*128)
 
 /* ------------------------------ */
@@ -10920,11 +10944,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(218*128)
 
 /* ------------------------------ */
@@ -10942,11 +10966,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(219*128)
 
 /* ------------------------------ */
@@ -10964,11 +10988,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(220*128)
 
 /* ------------------------------ */
@@ -10986,11 +11010,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(221*128)
 
 /* ------------------------------ */
@@ -11008,11 +11032,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(222*128)
 
 /* ------------------------------ */
@@ -11030,11 +11054,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(223*128)
 
 /* ------------------------------ */
@@ -11052,11 +11076,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(224*128)
 
 /* ------------------------------ */
@@ -11074,11 +11098,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(225*128)
 
 /* ------------------------------ */
@@ -11096,11 +11120,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(226*128)
 
 /* ------------------------------ */
@@ -11118,11 +11142,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(227*128)
 
 /* ------------------------------ */
@@ -11140,11 +11164,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(228*128)
 
 /* ------------------------------ */
@@ -11162,11 +11186,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(229*128)
 
 /* ------------------------------ */
@@ -11184,11 +11208,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(230*128)
 
 /* ------------------------------ */
@@ -11206,11 +11230,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(231*128)
 
 /* ------------------------------ */
@@ -11228,11 +11252,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(232*128)
 
 /* ------------------------------ */
@@ -11250,11 +11274,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(233*128)
 
 /* ------------------------------ */
@@ -11272,11 +11296,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(234*128)
 
 /* ------------------------------ */
@@ -11294,11 +11318,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(235*128)
 
 /* ------------------------------ */
@@ -11316,11 +11340,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(236*128)
 
 /* ------------------------------ */
@@ -11338,11 +11362,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(237*128)
 
 /* ------------------------------ */
@@ -11360,11 +11384,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(238*128)
 
 /* ------------------------------ */
@@ -11382,11 +11406,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(239*128)
 
 /* ------------------------------ */
@@ -11404,11 +11428,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(240*128)
 
 /* ------------------------------ */
@@ -11426,11 +11450,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(241*128)
 
 /* ------------------------------ */
@@ -11448,16 +11472,16 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(242*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_invoke_lambda: /* 0xf3 */
+.L_ALT_op_unused_f3: /* 0xf3 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11470,11 +11494,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(243*128)
 
 /* ------------------------------ */
@@ -11492,16 +11516,16 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(244*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_capture_variable: /* 0xf5 */
+.L_ALT_op_unused_f5: /* 0xf5 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11514,16 +11538,16 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(245*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_create_lambda: /* 0xf6 */
+.L_ALT_op_unused_f6: /* 0xf6 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11536,16 +11560,16 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(246*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_liberate_variable: /* 0xf7 */
+.L_ALT_op_unused_f7: /* 0xf7 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11558,16 +11582,16 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(247*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_box_lambda: /* 0xf8 */
+.L_ALT_op_unused_f8: /* 0xf8 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11580,16 +11604,16 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(248*128)
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unbox_lambda: /* 0xf9 */
+.L_ALT_op_unused_f9: /* 0xf9 */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11602,11 +11626,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(249*128)
 
 /* ------------------------------ */
@@ -11624,11 +11648,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(250*128)
 
 /* ------------------------------ */
@@ -11646,11 +11670,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(251*128)
 
 /* ------------------------------ */
@@ -11668,11 +11692,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(252*128)
 
 /* ------------------------------ */
@@ -11690,11 +11714,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(253*128)
 
 /* ------------------------------ */
@@ -11712,11 +11736,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(254*128)
 
 /* ------------------------------ */
@@ -11734,11 +11758,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(255*128)
 
     .balign 128
diff --git a/runtime/interpreter/mterp/x86/alt_stub.S b/runtime/interpreter/mterp/x86/alt_stub.S
index 5a91167..a5b39b8 100644
--- a/runtime/interpreter/mterp/x86/alt_stub.S
+++ b/runtime/interpreter/mterp/x86/alt_stub.S
@@ -9,12 +9,11 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
-
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %eax
     movl    %eax, OUT_ARG1(%esp)
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movl    rPC, OUT_ARG2(%esp)
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     REFRESH_IBASE
     jmp     .L_op_nop+(${opnum}*${handler_size_bytes})
diff --git a/runtime/interpreter/mterp/x86/op_unused_f3.S b/runtime/interpreter/mterp/x86/op_unused_f3.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f3.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f5.S b/runtime/interpreter/mterp/x86/op_unused_f5.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f5.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f6.S b/runtime/interpreter/mterp/x86/op_unused_f6.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f6.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f7.S b/runtime/interpreter/mterp/x86/op_unused_f7.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f7.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f8.S b/runtime/interpreter/mterp/x86/op_unused_f8.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f8.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_f9.S b/runtime/interpreter/mterp/x86/op_unused_f9.S
new file mode 100644
index 0000000..31d98c1
--- /dev/null
+++ b/runtime/interpreter/mterp/x86/op_unused_f9.S
@@ -0,0 +1 @@
+%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/alt_stub.S b/runtime/interpreter/mterp/x86_64/alt_stub.S
index 6fcebbb..24cd1a8 100644
--- a/runtime/interpreter/mterp/x86_64/alt_stub.S
+++ b/runtime/interpreter/mterp/x86_64/alt_stub.S
@@ -9,9 +9,9 @@
  * return.
  */
     .extern MterpCheckBefore
-    EXPORT_PC
     REFRESH_IBASE
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame)
+    movq    rPC, OUT_ARG2
+    call    SYMBOL(MterpCheckBefore)        # (self, shadow_frame, dex_pc_ptr)
     jmp     .L_op_nop+(${opnum}*${handler_size_bytes})
diff --git a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
index 626bad4..7ea36a6 100644
--- a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
@@ -1,7 +1,7 @@
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC
-    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
-    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + BBBBbbbb*2
+    movslq  2(rPC), %rcx                    # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG0, rINSTq            # OUT_ARG0 <- vAA (array object)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     testb   %al, %al                        # 0 means an exception is thrown
diff --git a/runtime/interpreter/mterp/x86_64/op_packed_switch.S b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
index fdf5a50..148552f 100644
--- a/runtime/interpreter/mterp/x86_64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
@@ -9,8 +9,8 @@
  * for: packed-switch, sparse-switch
  */
     /* op vAA, +BBBB */
-    movslq  2(rPC), OUT_ARG0                # rcx <- BBBBbbbb
-    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL($func)
     testl   %eax, %eax
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f3.S b/runtime/interpreter/mterp/x86_64/op_unused_f3.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f3.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f5.S b/runtime/interpreter/mterp/x86_64/op_unused_f5.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f5.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f6.S b/runtime/interpreter/mterp/x86_64/op_unused_f6.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f6.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f7.S b/runtime/interpreter/mterp/x86_64/op_unused_f7.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f7.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f8.S b/runtime/interpreter/mterp/x86_64/op_unused_f8.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f8.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_f9.S b/runtime/interpreter/mterp/x86_64/op_unused_f9.S
new file mode 100644
index 0000000..280615f
--- /dev/null
+++ b/runtime/interpreter/mterp/x86_64/op_unused_f9.S
@@ -0,0 +1 @@
+%include "x86_64/unused.S"
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 1f473e4..a0e0e62 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -29,6 +29,7 @@
 
 #include "art_method-inl.h"
 #include "base/casts.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "class_linker.h"
@@ -40,6 +41,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/class.h"
 #include "mirror/field-inl.h"
+#include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
@@ -300,11 +302,27 @@
                            PrettyDescriptor(klass).c_str());
     return;
   }
-  if (Runtime::Current()->IsActiveTransaction()) {
-    result->SetL(mirror::Field::CreateFromArtField<true>(self, found, true));
+  Runtime* runtime = Runtime::Current();
+  PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Field* field;
+  if (runtime->IsActiveTransaction()) {
+    if (pointer_size == PointerSize::k64) {
+      field = mirror::Field::CreateFromArtField<PointerSize::k64, true>(
+          self, found, true);
+    } else {
+      field = mirror::Field::CreateFromArtField<PointerSize::k32, true>(
+          self, found, true);
+    }
   } else {
-    result->SetL(mirror::Field::CreateFromArtField<false>(self, found, true));
+    if (pointer_size == PointerSize::k64) {
+      field = mirror::Field::CreateFromArtField<PointerSize::k64, false>(
+          self, found, true);
+    } else {
+      field = mirror::Field::CreateFromArtField<PointerSize::k32, false>(
+          self, found, true);
+    }
   }
+  result->SetL(field);
 }
 
 // This is required for Enum(Set) code, as that uses reflection to inspect enum classes.
@@ -319,11 +337,28 @@
   mirror::String* name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
   mirror::ObjectArray<mirror::Class>* args =
       shadow_frame->GetVRegReference(arg_offset + 2)->AsObjectArray<mirror::Class>();
-  if (Runtime::Current()->IsActiveTransaction()) {
-    result->SetL(mirror::Class::GetDeclaredMethodInternal<true>(self, klass, name, args));
+  Runtime* runtime = Runtime::Current();
+  bool transaction = runtime->IsActiveTransaction();
+  PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Method* method;
+  if (transaction) {
+    if (pointer_size == PointerSize::k64) {
+      method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k64, true>(
+          self, klass, name, args);
+    } else {
+      method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k32, true>(
+          self, klass, name, args);
+    }
   } else {
-    result->SetL(mirror::Class::GetDeclaredMethodInternal<false>(self, klass, name, args));
+    if (pointer_size == PointerSize::k64) {
+      method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k64, false>(
+          self, klass, name, args);
+    } else {
+      method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k32, false>(
+          self, klass, name, args);
+    }
   }
+  result->SetL(method);
 }
 
 // Special managed code cut-out to allow constructor lookup in a un-started runtime.
@@ -336,11 +371,28 @@
   }
   mirror::ObjectArray<mirror::Class>* args =
       shadow_frame->GetVRegReference(arg_offset + 1)->AsObjectArray<mirror::Class>();
-  if (Runtime::Current()->IsActiveTransaction()) {
-    result->SetL(mirror::Class::GetDeclaredConstructorInternal<true>(self, klass, args));
+  Runtime* runtime = Runtime::Current();
+  bool transaction = runtime->IsActiveTransaction();
+  PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+  mirror::Constructor* constructor;
+  if (transaction) {
+    if (pointer_size == PointerSize::k64) {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k64,
+                                                                  true>(self, klass, args);
+    } else {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k32,
+                                                                  true>(self, klass, args);
+    }
   } else {
-    result->SetL(mirror::Class::GetDeclaredConstructorInternal<false>(self, klass, args));
+    if (pointer_size == PointerSize::k64) {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k64,
+                                                                  false>(self, klass, args);
+    } else {
+      constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k32,
+                                                                  false>(self, klass, args);
+    }
   }
+  result->SetL(constructor);
 }
 
 void UnstartedRuntime::UnstartedClassGetEnclosingClass(
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index 814b001..7e1f795 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -20,6 +20,7 @@
 #include <locale>
 
 #include "base/casts.h"
+#include "base/enums.h"
 #include "base/memory_tool.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
@@ -383,7 +384,7 @@
   ScopedObjectAccess soa(self);
   mirror::Class* klass = mirror::String::GetJavaLangString();
   ArtMethod* method = klass->FindDeclaredDirectMethod("<init>", "(Ljava/lang/String;)V",
-                                                      sizeof(void*));
+                                                      kRuntimePointerSize);
 
   // create instruction data for invoke-direct {v0, v1} of method with fake index
   uint16_t inst_data[3] = { 0x2070, 0x0000, 0x0010 };
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index c644cde..2401bec 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -48,7 +48,7 @@
 static const size_t kWeakGlobalsInitial = 16;  // Arbitrary.
 static const size_t kWeakGlobalsMax = 51200;  // Arbitrary sanity check. (Must fit in 16 bits.)
 
-static bool IsBadJniVersion(int version) {
+bool JavaVMExt::IsBadJniVersion(int version) {
   // We don't support JNI_VERSION_1_1. These are the only other valid versions.
   return version != JNI_VERSION_1_2 && version != JNI_VERSION_1_4 && version != JNI_VERSION_1_6;
 }
@@ -344,13 +344,6 @@
   }
 
   static jint GetEnv(JavaVM* vm, void** env, jint version) {
-    // GetEnv always returns a JNIEnv* for the most current supported JNI version,
-    // and unlike other calls that take a JNI version doesn't care if you supply
-    // JNI_VERSION_1_1, which we don't otherwise support.
-    if (IsBadJniVersion(version) && version != JNI_VERSION_1_1) {
-      LOG(ERROR) << "Bad JNI version passed to GetEnv: " << version;
-      return JNI_EVERSION;
-    }
     if (vm == nullptr || env == nullptr) {
       return JNI_ERR;
     }
@@ -359,8 +352,8 @@
       *env = nullptr;
       return JNI_EDETACHED;
     }
-    *env = thread->GetJniEnv();
-    return JNI_OK;
+    JavaVMExt* raw_vm = reinterpret_cast<JavaVMExt*>(vm);
+    return raw_vm->HandleGetEnv(env, version);
   }
 
  private:
@@ -388,7 +381,7 @@
     const char* thread_name = nullptr;
     jobject thread_group = nullptr;
     if (args != nullptr) {
-      if (IsBadJniVersion(args->version)) {
+      if (JavaVMExt::IsBadJniVersion(args->version)) {
         LOG(ERROR) << "Bad JNI version passed to "
                    << (as_daemon ? "AttachCurrentThreadAsDaemon" : "AttachCurrentThread") << ": "
                    << args->version;
@@ -436,7 +429,8 @@
       weak_globals_lock_("JNI weak global reference table lock", kJniWeakGlobalsLock),
       weak_globals_(kWeakGlobalsInitial, kWeakGlobalsMax, kWeakGlobal),
       allow_accessing_weak_globals_(true),
-      weak_globals_add_condition_("weak globals add condition", weak_globals_lock_) {
+      weak_globals_add_condition_("weak globals add condition", weak_globals_lock_),
+      env_hooks_() {
   functions = unchecked_functions_;
   SetCheckJniEnabled(runtime_options.Exists(RuntimeArgumentMap::CheckJni));
 }
@@ -444,6 +438,26 @@
 JavaVMExt::~JavaVMExt() {
 }
 
+jint JavaVMExt::HandleGetEnv(/*out*/void** env, jint version) {
+  for (GetEnvHook hook : env_hooks_) {
+    jint res = hook(this, env, version);
+    if (res == JNI_OK) {
+      return JNI_OK;
+    } else if (res != JNI_EVERSION) {
+      LOG(ERROR) << "Error returned from a plugin GetEnv handler! " << res;
+      return res;
+    }
+  }
+  LOG(ERROR) << "Bad JNI version passed to GetEnv: " << version;
+  return JNI_EVERSION;
+}
+
+// Add a hook to handle getting environments from the GetEnv call.
+void JavaVMExt::AddEnvironmentHook(GetEnvHook hook) {
+  CHECK(hook != nullptr) << "environment hooks shouldn't be null!";
+  env_hooks_.push_back(hook);
+}
+
 void JavaVMExt::JniAbort(const char* jni_function_name, const char* msg) {
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
@@ -866,7 +880,7 @@
 
     if (version == JNI_ERR) {
       StringAppendF(error_msg, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str());
-    } else if (IsBadJniVersion(version)) {
+    } else if (JavaVMExt::IsBadJniVersion(version)) {
       StringAppendF(error_msg, "Bad JNI version returned from JNI_OnLoad in \"%s\": %d",
                     path.c_str(), version);
       // It's unwise to call dlclose() here, but we can mark it
@@ -939,7 +953,7 @@
 extern "C" jint JNI_CreateJavaVM(JavaVM** p_vm, JNIEnv** p_env, void* vm_args) {
   ScopedTrace trace(__FUNCTION__);
   const JavaVMInitArgs* args = static_cast<JavaVMInitArgs*>(vm_args);
-  if (IsBadJniVersion(args->version)) {
+  if (JavaVMExt::IsBadJniVersion(args->version)) {
     LOG(ERROR) << "Bad JNI version passed to CreateJavaVM: " << args->version;
     return JNI_EVERSION;
   }
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 3d055cd..ed9d3ab 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -36,6 +36,10 @@
 class Runtime;
 struct RuntimeArgumentMap;
 
+class JavaVMExt;
+// Hook definition for runtime plugins.
+using GetEnvHook = jint (*)(JavaVMExt* vm, /*out*/void** new_env, jint version);
+
 class JavaVMExt : public JavaVM {
  public:
   JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options);
@@ -171,6 +175,12 @@
   void TrimGlobals() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!globals_lock_);
 
+  jint HandleGetEnv(/*out*/void** env, jint version);
+
+  void AddEnvironmentHook(GetEnvHook hook);
+
+  static bool IsBadJniVersion(int version);
+
  private:
   // Return true if self can currently access weak globals.
   bool MayAccessWeakGlobalsUnlocked(Thread* self) const SHARED_REQUIRES(Locks::mutator_lock_);
@@ -215,6 +225,9 @@
   Atomic<bool> allow_accessing_weak_globals_;
   ConditionVariable weak_globals_add_condition_ GUARDED_BY(weak_globals_lock_);
 
+  // TODO Maybe move this to Runtime.
+  std::vector<GetEnvHook> env_hooks_;
+
   DISALLOW_COPY_AND_ASSIGN(JavaVMExt);
 };
 
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index 51952c4..e9d6d07 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -24,7 +24,7 @@
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/sockets.h"
 #endif
 
@@ -224,7 +224,7 @@
        */
       int  ret = connect(control_sock_, &control_addr_.controlAddrPlain, control_addr_len_);
       if (!ret) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
         if (!socket_peer_is_trusted(control_sock_)) {
           if (shutdown(control_sock_, SHUT_RDWR)) {
             PLOG(ERROR) << "trouble shutting down socket";
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 668d5dc..dbf04fe 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -251,7 +251,7 @@
     case kJdwpTransportSocket:
       InitSocketTransport(state.get(), options);
       break;
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     case kJdwpTransportAndroidAdb:
       InitAdbTransport(state.get(), options);
       break;
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ae5a0f6..cff2354 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -19,6 +19,7 @@
 #include <dlfcn.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
@@ -59,8 +60,8 @@
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
-  jit_options->save_profiling_info_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);
+  jit_options->profile_saver_options_ =
+      options.GetOrDefault(RuntimeArgumentMap::ProfileSaverOpts);
 
   jit_options->compile_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
   if (jit_options->compile_threshold_ > std::numeric_limits<uint16_t>::max()) {
@@ -144,11 +145,10 @@
              cumulative_timings_("JIT timings"),
              memory_use_("Memory used for compilation", 16),
              lock_("JIT memory use lock"),
-             use_jit_compilation_(true),
-             save_profiling_info_(false) {}
+             use_jit_compilation_(true) {}
 
 Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
-  DCHECK(options->UseJitCompilation() || options->GetSaveProfilingInfo());
+  DCHECK(options->UseJitCompilation() || options->GetProfileSaverOptions().IsEnabled());
   std::unique_ptr<Jit> jit(new Jit);
   jit->dump_info_on_shutdown_ = options->DumpJitInfoOnShutdown();
   if (jit_compiler_handle_ == nullptr && !LoadCompiler(error_msg)) {
@@ -163,12 +163,12 @@
     return nullptr;
   }
   jit->use_jit_compilation_ = options->UseJitCompilation();
-  jit->save_profiling_info_ = options->GetSaveProfilingInfo();
+  jit->profile_saver_options_ = options->GetProfileSaverOptions();
   VLOG(jit) << "JIT created with initial_capacity="
       << PrettySize(options->GetCodeCacheInitialCapacity())
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
       << ", compile_threshold=" << options->GetCompileThreshold()
-      << ", save_profiling_info=" << options->GetSaveProfilingInfo();
+      << ", profile_saver_options=" << options->GetProfileSaverOptions();
 
 
   jit->hot_method_threshold_ = options->GetCompileThreshold();
@@ -259,7 +259,7 @@
 
   // If we get a request to compile a proxy method, we pass the actual Java method
   // of that proxy method, as the compiler does not expect a proxy method.
-  ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
+  ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr)) {
     return false;
   }
@@ -310,13 +310,18 @@
                             const std::vector<std::string>& code_paths,
                             const std::string& foreign_dex_profile_path,
                             const std::string& app_dir) {
-  if (save_profiling_info_) {
-    ProfileSaver::Start(filename, code_cache_.get(), code_paths, foreign_dex_profile_path, app_dir);
+  if (profile_saver_options_.IsEnabled()) {
+    ProfileSaver::Start(profile_saver_options_,
+                        filename,
+                        code_cache_.get(),
+                        code_paths,
+                        foreign_dex_profile_path,
+                        app_dir);
   }
 }
 
 void Jit::StopProfileSaver() {
-  if (save_profiling_info_ && ProfileSaver::IsStarted()) {
+  if (profile_saver_options_.IsEnabled() && ProfileSaver::IsStarted()) {
     ProfileSaver::Stop(dump_info_on_shutdown_);
   }
 }
@@ -330,7 +335,7 @@
 }
 
 Jit::~Jit() {
-  DCHECK(!save_profiling_info_ || !ProfileSaver::IsStarted());
+  DCHECK(!profile_saver_options_.IsEnabled() || !ProfileSaver::IsStarted());
   if (dump_info_on_shutdown_) {
     DumpInfo(LOG(INFO));
   }
@@ -406,7 +411,7 @@
 
   // Get the actual Java method if this method is from a proxy class. The compiler
   // and the JIT code cache do not expect methods from proxy classes.
-  method = method->GetInterfaceMethodIfProxy(sizeof(void*));
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
 
   // Cheap check if the method has been compiled already. That's an indicator that we should
   // osr into it.
@@ -612,7 +617,7 @@
   int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
   if (starting_count < warm_method_threshold_) {
     if ((new_count >= warm_method_threshold_) &&
-        (method->GetProfilingInfo(sizeof(void*)) == nullptr)) {
+        (method->GetProfilingInfo(kRuntimePointerSize) == nullptr)) {
       bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
       if (success) {
         VLOG(jit) << "Start profiling " << PrettyMethod(method);
@@ -667,7 +672,7 @@
     return;
   }
 
-  ProfilingInfo* profiling_info = method->GetProfilingInfo(sizeof(void*));
+  ProfilingInfo* profiling_info = method->GetProfilingInfo(kRuntimePointerSize);
   // Update the entrypoint if the ProfilingInfo has one. The interpreter will call it
   // instead of interpreting the method.
   if ((profiling_info != nullptr) && (profiling_info->GetSavedEntryPoint() != nullptr)) {
@@ -685,11 +690,8 @@
                                    ArtMethod* callee ATTRIBUTE_UNUSED) {
   ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
   DCHECK(this_object != nullptr);
-  ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
+  ProfilingInfo* info = caller->GetProfilingInfo(kRuntimePointerSize);
   if (info != nullptr) {
-    // Since the instrumentation is marked from the declaring class we need to mark the card so
-    // that mod-union tables and card rescanning know about the update.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
     info->AddInvokeInfo(dex_pc, this_object->GetClass());
   }
 }
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index f3a6240..2aa6f3d 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -24,6 +24,7 @@
 #include "base/timing_logger.h"
 #include "object_callbacks.h"
 #include "offline_profiling_info.h"
+#include "jit/profile_saver_options.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -92,8 +93,8 @@
     return use_jit_compilation_;
   }
 
-  bool SaveProfilingInfo() const {
-    return save_profiling_info_;
+  bool GetSaveProfilingInfo() const {
+    return profile_saver_options_.IsEnabled();
   }
 
   // Wait until there is no more pending compilation tasks.
@@ -189,7 +190,7 @@
   std::unique_ptr<jit::JitCodeCache> code_cache_;
 
   bool use_jit_compilation_;
-  bool save_profiling_info_;
+  ProfileSaverOptions profile_saver_options_;
   static bool generate_debug_info_;
   uint16_t hot_method_threshold_;
   uint16_t warm_method_threshold_;
@@ -228,8 +229,11 @@
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
   }
+  const ProfileSaverOptions& GetProfileSaverOptions() const {
+    return profile_saver_options_;
+  }
   bool GetSaveProfilingInfo() const {
-    return save_profiling_info_;
+    return profile_saver_options_.IsEnabled();
   }
   bool UseJitCompilation() const {
     return use_jit_compilation_;
@@ -237,8 +241,8 @@
   void SetUseJitCompilation(bool b) {
     use_jit_compilation_ = b;
   }
-  void SetSaveProfilingInfo(bool b) {
-    save_profiling_info_ = b;
+  void SetSaveProfilingInfo(bool save_profiling_info) {
+    profile_saver_options_.SetEnabled(save_profiling_info);
   }
   void SetJitAtFirstUse() {
     use_jit_compilation_ = true;
@@ -255,15 +259,14 @@
   uint16_t priority_thread_weight_;
   size_t invoke_transition_weight_;
   bool dump_info_on_shutdown_;
-  bool save_profiling_info_;
+  ProfileSaverOptions profile_saver_options_;
 
   JitOptions()
       : use_jit_compilation_(false),
         code_cache_initial_capacity_(0),
         code_cache_max_capacity_(0),
         compile_threshold_(0),
-        dump_info_on_shutdown_(false),
-        save_profiling_info_(false) { }
+        dump_info_on_shutdown_(false) {}
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 8c1d776..b1079dd 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -19,6 +19,7 @@
 #include <sstream>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
@@ -742,13 +743,14 @@
         // a method has compiled code but no ProfilingInfo.
         // We make sure compiled methods have a ProfilingInfo object. It is needed for
         // code cache collection.
-        if (ContainsPc(ptr) && info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
+        if (ContainsPc(ptr) &&
+            info->GetMethod()->GetProfilingInfo(kRuntimePointerSize) == nullptr) {
           // We clear the inline caches as classes in it might be stalled.
           info->ClearGcRootsInInlineCaches();
           // Do a fence to make sure the clearing is seen before attaching to the method.
           QuasiAtomic::ThreadFenceRelease();
           info->GetMethod()->SetProfilingInfo(info);
-        } else if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) != info) {
+        } else if (info->GetMethod()->GetProfilingInfo(kRuntimePointerSize) != info) {
           // No need for this ProfilingInfo object anymore.
           FreeData(reinterpret_cast<uint8_t*>(info));
           return true;
@@ -766,7 +768,7 @@
   // have memory leaks of compiled code otherwise.
   for (const auto& it : method_code_map_) {
     ArtMethod* method = it.second;
-    if (method->GetProfilingInfo(sizeof(void*)) == nullptr) {
+    if (method->GetProfilingInfo(kRuntimePointerSize) == nullptr) {
       const void* code_ptr = it.first;
       const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) {
@@ -855,7 +857,7 @@
       sizeof(void*));
 
   // Check whether some other thread has concurrently created it.
-  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
   if (info != nullptr) {
     return info;
   }
@@ -923,7 +925,7 @@
     return false;
   }
 
-  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
   if (info == nullptr) {
     VLOG(jit) << PrettyMethod(method) << " needs a ProfilingInfo to be compiled";
     // Because the counter is not atomic, there are some rare cases where we may not
@@ -943,7 +945,7 @@
 
 ProfilingInfo* JitCodeCache::NotifyCompilerUse(ArtMethod* method, Thread* self) {
   MutexLock mu(self, lock_);
-  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
   if (info != nullptr) {
     info->IncrementInlineUse();
   }
@@ -952,13 +954,13 @@
 
 void JitCodeCache::DoneCompilerUse(ArtMethod* method, Thread* self) {
   MutexLock mu(self, lock_);
-  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
   DCHECK(info != nullptr);
   info->DecrementInlineUse();
 }
 
 void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED, bool osr) {
-  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
   DCHECK(info->IsMethodBeingCompiled(osr));
   info->SetIsMethodBeingCompiled(false, osr);
 }
@@ -970,7 +972,7 @@
 
 void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method,
                                              const OatQuickMethodHeader* header) {
-  ProfilingInfo* profiling_info = method->GetProfilingInfo(sizeof(void*));
+  ProfilingInfo* profiling_info = method->GetProfilingInfo(kRuntimePointerSize);
   if ((profiling_info != nullptr) &&
       (profiling_info->GetSavedEntryPoint() == header->GetEntryPoint())) {
     // Prevent future uses of the compiled code.
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 6dc1578..1938221 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -146,7 +146,6 @@
   // Remove all methods in our cache that were allocated by 'alloc'.
   void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
       REQUIRES(!lock_)
-      REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ClearGcRootsInInlineCaches(Thread* self) REQUIRES(!lock_);
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index cdbff19..aa606a2 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -19,6 +19,7 @@
 #include "errno.h"
 #include <limits.h>
 #include <vector>
+#include <stdlib.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
@@ -40,6 +41,11 @@
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
+// Debug flag to ignore checksums when testing if a method or a class is present in the profile.
+// Used to facilitate testing profile guided compilation across a large number of apps
+// using the same test profile.
+static constexpr bool kDebugIgnoreChecksum = false;
+
 // Transform the actual dex location into relative paths.
 // Note: this is OK because we don't store profiles of different apps into the same file.
 // Apps with split apks don't cause trouble because each split has a different name and will not
@@ -406,7 +412,8 @@
   line_header->checksum = header_buffer.ReadUintAndAdvance<uint32_t>();
 
   if (dex_location_size == 0 || dex_location_size > kMaxDexFileKeyLength) {
-    *error = "DexFileKey has an invalid size: " + std::to_string(dex_location_size);
+    *error = "DexFileKey has an invalid size: " +
+        std::to_string(static_cast<uint32_t>(dex_location_size));
     return kProfileLoadBadData;
   }
 
@@ -546,10 +553,14 @@
   return true;
 }
 
+static bool ChecksumMatch(const DexFile& dex_file, uint32_t checksum) {
+  return kDebugIgnoreChecksum || dex_file.GetLocationChecksum() == checksum;
+}
+
 bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
   auto info_it = info_.find(GetProfileDexFileKey(method_ref.dex_file->GetLocation()));
   if (info_it != info_.end()) {
-    if (method_ref.dex_file->GetLocationChecksum() != info_it->second.checksum) {
+    if (!ChecksumMatch(*method_ref.dex_file, info_it->second.checksum)) {
       return false;
     }
     const std::set<uint16_t>& methods = info_it->second.method_set;
@@ -561,7 +572,7 @@
 bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const {
   auto info_it = info_.find(GetProfileDexFileKey(dex_file.GetLocation()));
   if (info_it != info_.end()) {
-    if (dex_file.GetLocationChecksum() != info_it->second.checksum) {
+    if (!ChecksumMatch(dex_file, info_it->second.checksum)) {
       return false;
     }
     const std::set<uint16_t>& classes = info_it->second.class_set;
@@ -658,4 +669,47 @@
   }
 }
 
+// Naive implementation to generate a random profile file suitable for testing.
+bool ProfileCompilationInfo::GenerateTestProfile(int fd,
+                                                 uint16_t number_of_dex_files,
+                                                 uint16_t method_ratio,
+                                                 uint16_t class_ratio) {
+  const std::string base_dex_location = "base.apk";
+  ProfileCompilationInfo info;
+  // The limits are defined by the dex specification.
+  uint16_t max_method = std::numeric_limits<uint16_t>::max();
+  uint16_t max_classes = std::numeric_limits<uint16_t>::max();
+  uint16_t number_of_methods = max_method * method_ratio / 100;
+  uint16_t number_of_classes = max_classes * class_ratio / 100;
+
+  srand(MicroTime());
+
+  // Make sure we generate more samples with a low index value.
+  // This makes it more likely to hit valid method/class indices in small apps.
+  const uint16_t kFavorFirstN = 10000;
+  const uint16_t kFavorSplit = 2;
+
+  for (uint16_t i = 0; i < number_of_dex_files; i++) {
+    std::string dex_location = DexFile::GetMultiDexLocation(i, base_dex_location.c_str());
+    std::string profile_key = GetProfileDexFileKey(dex_location);
+
+    for (uint16_t m = 0; m < number_of_methods; m++) {
+      uint16_t method_idx = rand() % max_method;
+      if (m < (number_of_methods / kFavorSplit)) {
+        method_idx %= kFavorFirstN;
+      }
+      info.AddMethodIndex(profile_key, 0, method_idx);
+    }
+
+    for (uint16_t c = 0; c < number_of_classes; c++) {
+      uint16_t class_idx = rand() % max_classes;
+      if (c < (number_of_classes / kFavorSplit)) {
+        class_idx %= kFavorFirstN;
+      }
+      info.AddClassIndex(profile_key, 0, class_idx);
+    }
+  }
+  return info.Save(fd);
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 5a07da7..0b26f9b 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -87,6 +87,11 @@
   // Clears the resolved classes from the current object.
   void ClearResolvedClasses();
 
+  static bool GenerateTestProfile(int fd,
+                                  uint16_t number_of_dex_files,
+                                  uint16_t method_ratio,
+                                  uint16_t class_ratio);
+
  private:
   enum ProfileLoadSatus {
     kProfileLoadIOError,
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 794ea27..927681c 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -21,6 +21,7 @@
 #include <fcntl.h>
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
 #include "compiler_filter.h"
@@ -30,25 +31,11 @@
 
 namespace art {
 
-// TODO: read the constants from ProfileOptions,
-// Add a random delay each time we go to sleep so that we don't hammer the CPU
-// with all profile savers running at the same time.
-static constexpr const uint64_t kMinSavePeriodNs = MsToNs(20 * 1000);  // 20 seconds
-static constexpr const uint64_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
-// Minimum number of JIT samples during launch to include a method into the profile.
-static constexpr const size_t kStartupMethodSamples = 1;
-
-static constexpr const uint32_t kMinimumNumberOfMethodsToSave = 10;
-static constexpr const uint32_t kMinimumNumberOfClassesToSave = 10;
-static constexpr const uint32_t kMinimumNumberOfNotificationBeforeWake =
-    kMinimumNumberOfMethodsToSave;
-static constexpr const uint32_t kMaximumNumberOfNotificationBeforeWake = 50;
-
-
 ProfileSaver* ProfileSaver::instance_ = nullptr;
 pthread_t ProfileSaver::profiler_pthread_ = 0U;
 
-ProfileSaver::ProfileSaver(const std::string& output_filename,
+ProfileSaver::ProfileSaver(const ProfileSaverOptions& options,
+                           const std::string& output_filename,
                            jit::JitCodeCache* jit_code_cache,
                            const std::vector<std::string>& code_paths,
                            const std::string& foreign_dex_profile_path,
@@ -72,7 +59,9 @@
       total_number_of_foreign_dex_marks_(0),
       max_number_of_profile_entries_cached_(0),
       total_number_of_hot_spikes_(0),
-      total_number_of_wake_ups_(0) {
+      total_number_of_wake_ups_(0),
+      options_(options) {
+  DCHECK(options_.IsEnabled());
   AddTrackedLocations(output_filename, app_data_dir, code_paths);
 }
 
@@ -80,14 +69,13 @@
   Thread* self = Thread::Current();
 
   // Fetch the resolved classes for the app images after sleeping for
-  // kSaveResolvedClassesDelayMs.
+  // options_.GetSaveResolvedClassesDelayMs().
   // TODO(calin) This only considers the case of the primary profile file.
   // Anything that gets loaded in the same VM will not have their resolved
   // classes save (unless they started before the initial saving was done).
   {
     MutexLock mu(self, wait_lock_);
-    constexpr uint64_t kSleepTime = kSaveResolvedClassesDelayMs;
-    const uint64_t end_time = NanoTime() + MsToNs(kSleepTime);
+    const uint64_t end_time = NanoTime() + MsToNs(options_.GetSaveResolvedClassesDelayMs());
     while (true) {
       const uint64_t current_time = NanoTime();
       if (current_time >= end_time) {
@@ -95,7 +83,7 @@
       }
       period_condition_.TimedWait(self, NsToMs(end_time - current_time), 0);
     }
-    total_ms_of_sleep_ += kSaveResolvedClassesDelayMs;
+    total_ms_of_sleep_ += options_.GetSaveResolvedClassesDelayMs();
   }
   FetchAndCacheResolvedClassesAndMethods();
 
@@ -117,10 +105,11 @@
       // We might have been woken up by a huge number of notifications to guarantee saving.
       // If we didn't meet the minimum saving period go back to sleep (only if missed by
       // a reasonable margin).
-      while (kMinSavePeriodNs * 0.9 > sleep_time) {
+      uint64_t min_save_period_ns = MsToNs(options_.GetMinSavePeriodMs());
+      while (min_save_period_ns * 0.9 > sleep_time) {
         {
           MutexLock mu(self, wait_lock_);
-          period_condition_.TimedWait(self, NsToMs(kMinSavePeriodNs - sleep_time), 0);
+          period_condition_.TimedWait(self, NsToMs(min_save_period_ns - sleep_time), 0);
           sleep_time = NanoTime() - sleep_start;
         }
         // Check if the thread was woken up for shutdown.
@@ -170,18 +159,17 @@
   jit_activity_notifications_++;
   // Note that we are not as precise as we could be here but we don't want to wake the saver
   // every time we see a hot method.
-  if (jit_activity_notifications_ > kMinimumNumberOfNotificationBeforeWake) {
+  if (jit_activity_notifications_ > options_.GetMinNotificationBeforeWake()) {
     MutexLock wait_mutex(Thread::Current(), wait_lock_);
-    if ((NanoTime() - last_time_ns_saver_woke_up_) > kMinSavePeriodNs) {
+    if ((NanoTime() - last_time_ns_saver_woke_up_) > MsToNs(options_.GetMinSavePeriodMs())) {
+      WakeUpSaver();
+    } else if (jit_activity_notifications_ > options_.GetMaxNotificationBeforeWake()) {
+      // Make sure to wake up the saver if we see a spike in the number of notifications.
+      // This is a precaution to avoid losing a big number of methods in case
+      // this is a spike with no jit after.
+      total_number_of_hot_spikes_++;
       WakeUpSaver();
     }
-  } else if (jit_activity_notifications_ > kMaximumNumberOfNotificationBeforeWake) {
-    // Make sure to wake up the saver if we see a spike in the number of notifications.
-    // This is a precaution to avoid "loosing" a big number of methods in case
-    // this is a spike with no jit after.
-    total_number_of_hot_spikes_++;
-    MutexLock wait_mutex(Thread::Current(), wait_lock_);
-    WakeUpSaver();
   }
 }
 
@@ -197,18 +185,21 @@
 // Excludes native methods and classes in the boot image.
 class GetMethodsVisitor : public ClassVisitor {
  public:
-  explicit GetMethodsVisitor(std::vector<MethodReference>* methods) : methods_(methods) {}
+  GetMethodsVisitor(std::vector<MethodReference>* methods, uint32_t startup_method_samples)
+    : methods_(methods),
+      startup_method_samples_(startup_method_samples) {}
 
   virtual bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
     if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
       return true;
     }
-    for (ArtMethod& method : klass->GetMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
       if (!method.IsNative()) {
-        if (method.GetCounter() >= kStartupMethodSamples ||
-            method.GetProfilingInfo(sizeof(void*)) != nullptr) {
+        if (method.GetCounter() >= startup_method_samples_ ||
+            method.GetProfilingInfo(kRuntimePointerSize) != nullptr) {
           // Have samples, add to profile.
-          const DexFile* dex_file = method.GetInterfaceMethodIfProxy(sizeof(void*))->GetDexFile();
+          const DexFile* dex_file =
+              method.GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetDexFile();
           methods_->push_back(MethodReference(dex_file, method.GetDexMethodIndex()));
         }
       }
@@ -218,6 +209,7 @@
 
  private:
   std::vector<MethodReference>* const methods_;
+  uint32_t startup_method_samples_;
 };
 
 void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
@@ -229,11 +221,11 @@
   std::vector<MethodReference> methods;
   {
     ScopedTrace trace2("Get hot methods");
-    GetMethodsVisitor visitor(&methods);
+    GetMethodsVisitor visitor(&methods, options_.GetStartupMethodSamples());
     ScopedObjectAccess soa(Thread::Current());
     class_linker->VisitClasses(&visitor);
     VLOG(profiler) << "Methods with samples greater than "
-                   << kStartupMethodSamples << " = " << methods.size();
+                   << options_.GetStartupMethodSamples() << " = " << methods.size();
   }
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   uint64_t total_number_of_profile_entries_cached = 0;
@@ -302,11 +294,11 @@
         cached_info->GetNumberOfResolvedClasses() -
         static_cast<int64_t>(last_save_number_of_classes_);
 
-    if (delta_number_of_methods < kMinimumNumberOfMethodsToSave &&
-        delta_number_of_classes < kMinimumNumberOfClassesToSave) {
+    if (delta_number_of_methods < options_.GetMinMethodsToSave() &&
+        delta_number_of_classes < options_.GetMinClassesToSave()) {
       VLOG(profiler) << "Not enough information to save to: " << filename
-          << " Nr of methods: " << delta_number_of_methods
-          << " Nr of classes: " << delta_number_of_classes;
+          << " Number of methods: " << delta_number_of_methods
+          << " Number of classes: " << delta_number_of_classes;
       total_number_of_skipped_writes_++;
       continue;
     }
@@ -385,12 +377,14 @@
   return true;
 }
 
-void ProfileSaver::Start(const std::string& output_filename,
+void ProfileSaver::Start(const ProfileSaverOptions& options,
+                         const std::string& output_filename,
                          jit::JitCodeCache* jit_code_cache,
                          const std::vector<std::string>& code_paths,
                          const std::string& foreign_dex_profile_path,
                          const std::string& app_data_dir) {
-  DCHECK(Runtime::Current()->SaveProfileInfo());
+  DCHECK(options.IsEnabled());
+  DCHECK(Runtime::Current()->GetJit() != nullptr);
   DCHECK(!output_filename.empty());
   DCHECK(jit_code_cache != nullptr);
 
@@ -420,7 +414,8 @@
   VLOG(profiler) << "Starting profile saver using output file: " << output_filename
       << ". Tracking: " << Join(code_paths_to_profile, ':');
 
-  instance_ = new ProfileSaver(output_filename,
+  instance_ = new ProfileSaver(options,
+                               output_filename,
                                jit_code_cache,
                                code_paths_to_profile,
                                foreign_dex_profile_path,
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 9c6d0fa..59e2c94 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -20,6 +20,7 @@
 #include "base/mutex.h"
 #include "jit_code_cache.h"
 #include "offline_profiling_info.h"
+#include "profile_saver_options.h"
 #include "safe_map.h"
 
 namespace art {
@@ -28,7 +29,8 @@
  public:
   // Starts the profile saver thread if not already started.
   // If the saver is already running it adds (output_filename, code_paths) to its tracked locations.
-  static void Start(const std::string& output_filename,
+  static void Start(const ProfileSaverOptions& options,
+                    const std::string& output_filename,
                     jit::JitCodeCache* jit_code_cache,
                     const std::vector<std::string>& code_paths,
                     const std::string& foreign_dex_profile_path,
@@ -61,7 +63,8 @@
                             uint16_t method_idx);
 
  private:
-  ProfileSaver(const std::string& output_filename,
+  ProfileSaver(const ProfileSaverOptions& options,
+               const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
                const std::vector<std::string>& code_paths,
                const std::string& foreign_dex_profile_path,
@@ -155,6 +158,7 @@
   uint64_t total_number_of_hot_spikes_;
   uint64_t total_number_of_wake_ups_;
 
+  const ProfileSaverOptions options_;
   DISALLOW_COPY_AND_ASSIGN(ProfileSaver);
 };
 
diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h
new file mode 100644
index 0000000..a6385d7
--- /dev/null
+++ b/runtime/jit/profile_saver_options.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
+#define ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
+
+#include <string>
+#include <ostream>
+
+namespace art {
+
+struct ProfileSaverOptions {
+ public:
+  static constexpr uint32_t kMinSavePeriodMs = 20 * 1000;  // 20 seconds
+  static constexpr uint32_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
+  // Minimum number of JIT samples during launch to include a method into the profile.
+  static constexpr uint32_t kStartupMethodSamples = 1;
+  static constexpr uint32_t kMinMethodsToSave = 10;
+  static constexpr uint32_t kMinClassesToSave = 10;
+  static constexpr uint32_t kMinNotificationBeforeWake = 10;
+  static constexpr uint32_t kMaxNotificationBeforeWake = 50;
+
+  ProfileSaverOptions() :
+    enabled_(false),
+    min_save_period_ms_(kMinSavePeriodMs),
+    save_resolved_classes_delay_ms_(kSaveResolvedClassesDelayMs),
+    startup_method_samples_(kStartupMethodSamples),
+    min_methods_to_save_(kMinMethodsToSave),
+    min_classes_to_save_(kMinClassesToSave),
+    min_notification_before_wake_(kMinNotificationBeforeWake),
+    max_notification_before_wake_(kMaxNotificationBeforeWake) {}
+
+  ProfileSaverOptions(
+      bool enabled,
+      uint32_t min_save_period_ms,
+      uint32_t save_resolved_classes_delay_ms,
+      uint32_t startup_method_samples,
+      uint32_t min_methods_to_save,
+      uint32_t min_classes_to_save,
+      uint32_t min_notification_before_wake,
+      uint32_t max_notification_before_wake):
+    enabled_(enabled),
+    min_save_period_ms_(min_save_period_ms),
+    save_resolved_classes_delay_ms_(save_resolved_classes_delay_ms),
+    startup_method_samples_(startup_method_samples),
+    min_methods_to_save_(min_methods_to_save),
+    min_classes_to_save_(min_classes_to_save),
+    min_notification_before_wake_(min_notification_before_wake),
+    max_notification_before_wake_(max_notification_before_wake) {}
+
+  bool IsEnabled() const {
+    return enabled_;
+  }
+  void SetEnabled(bool enabled) {
+    enabled_ = enabled;
+  }
+
+  uint32_t GetMinSavePeriodMs() const {
+    return min_save_period_ms_;
+  }
+  uint32_t GetSaveResolvedClassesDelayMs() const {
+    return save_resolved_classes_delay_ms_;
+  }
+  uint32_t GetStartupMethodSamples() const {
+    return startup_method_samples_;
+  }
+  uint32_t GetMinMethodsToSave() const {
+    return min_methods_to_save_;
+  }
+  uint32_t GetMinClassesToSave() const {
+    return min_classes_to_save_;
+  }
+  uint32_t GetMinNotificationBeforeWake() const {
+    return min_notification_before_wake_;
+  }
+  uint32_t GetMaxNotificationBeforeWake() const {
+    return max_notification_before_wake_;
+  }
+
+  friend std::ostream & operator<<(std::ostream &os, const ProfileSaverOptions& pso) {
+    os << "enabled_" << pso.enabled_
+        << ", min_save_period_ms_" << pso.min_save_period_ms_
+        << ", save_resolved_classes_delay_ms_" << pso.save_resolved_classes_delay_ms_
+        << ", startup_method_samples_" << pso.startup_method_samples_
+        << ", min_methods_to_save_" << pso.min_methods_to_save_
+        << ", min_classes_to_save_" << pso.min_classes_to_save_
+        << ", min_notification_before_wake_" << pso.min_notification_before_wake_
+        << ", max_notification_before_wake_" << pso.max_notification_before_wake_;
+    return os;
+  }
+
+  bool enabled_;
+  uint32_t min_save_period_ms_;
+  uint32_t save_resolved_classes_delay_ms_;
+  uint32_t startup_method_samples_;
+  uint32_t min_methods_to_save_;
+  uint32_t min_classes_to_save_;
+  uint32_t min_notification_before_wake_;
+  uint32_t max_notification_before_wake_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 07c8051..216df2f 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -25,10 +25,33 @@
 
 namespace art {
 
+ProfilingInfo::ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()),
+        method_(method),
+        is_method_being_compiled_(false),
+        is_osr_method_being_compiled_(false),
+        current_inline_uses_(0),
+        saved_entry_point_(nullptr) {
+  memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+  for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+    cache_[i].dex_pc_ = entries[i];
+  }
+  if (method->IsCopied()) {
+    // GetHoldingClassOfCopiedMethod is expensive, but creating a profiling info for a copied method
+    // appears to happen very rarely in practice.
+    holding_class_ = GcRoot<mirror::Class>(
+        Runtime::Current()->GetClassLinker()->GetHoldingClassOfCopiedMethod(method));
+  } else {
+    holding_class_ = GcRoot<mirror::Class>(method->GetDeclaringClass());
+  }
+  DCHECK(!holding_class_.IsNull());
+}
+
 bool ProfilingInfo::Create(Thread* self, ArtMethod* method, bool retry_allocation) {
   // Walk over the dex instructions of the method and keep track of
   // instructions we are interested in profiling.
   DCHECK(!method->IsNative());
+
   const DexFile::CodeItem& code_item = *method->GetCodeItem();
   const uint16_t* code_ptr = code_item.insns_;
   const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
@@ -93,6 +116,14 @@
         --i;
       } else {
         // We successfully set `cls`, just return.
+        // Since the instrumentation is marked from the declaring class we need to mark the card so
+        // that mod-union tables and card rescanning know about the update.
+        // Note that the declaring class is not necessarily the holding class if the method is
+        // copied. We need the card mark to be in the holding class since that is from where we
+        // will visit the profiling info.
+        if (!holding_class_.IsNull()) {
+          Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(holding_class_.Read());
+        }
         return;
       }
     }
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index d04d2de..a890fbb 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -105,6 +105,7 @@
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
   template<typename RootVisitorType>
   void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+    visitor.VisitRootIfNonNull(holding_class_.AddressWithoutBarrier());
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
       InlineCache* cache = &cache_[i];
       for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
@@ -166,18 +167,7 @@
   }
 
  private:
-  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
-      : number_of_inline_caches_(entries.size()),
-        method_(method),
-        is_method_being_compiled_(false),
-        is_osr_method_being_compiled_(false),
-        current_inline_uses_(0),
-        saved_entry_point_(nullptr) {
-    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
-    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc_ = entries[i];
-    }
-  }
+  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries);
 
   // Number of instructions we are profiling in the ArtMethod.
   const uint32_t number_of_inline_caches_;
@@ -185,6 +175,9 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
+  // Holding class for the method in case method is a copied method.
+  GcRoot<mirror::Class> holding_class_;
+
   // Whether the ArtMethod is currently being compiled. This flag
   // is implicitly guarded by the JIT code cache lock.
   // TODO: Make the JIT code cache lock global.
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index 1ee1611..40efc89 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -45,6 +45,20 @@
   return in->locals.IsValid();
 }
 
+jint JNIEnvExt::GetEnvHandler(JavaVMExt* vm, /*out*/void** env, jint version) {
+  UNUSED(vm);
+  // GetEnv always returns a JNIEnv* for the most current supported JNI version,
+  // and unlike other calls that take a JNI version doesn't care if you supply
+  // JNI_VERSION_1_1, which we don't otherwise support.
+  if (JavaVMExt::IsBadJniVersion(version) && version != JNI_VERSION_1_1) {
+    return JNI_EVERSION;
+  }
+  Thread* thread = Thread::Current();
+  CHECK(thread != nullptr);
+  *env = thread->GetJniEnv();
+  return JNI_OK;
+}
+
 JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) {
   std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in));
   if (CheckLocalsValid(ret.get())) {
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index d4accc3..ac287d4 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -54,6 +54,8 @@
   static Offset LocalRefCookieOffset(size_t pointer_size);
   static Offset SelfOffset(size_t pointer_size);
 
+  static jint GetEnvHandler(JavaVMExt* vm, /*out*/void** out, jint version);
+
   jobject NewLocalRef(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void DeleteLocalRef(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 7bd85ec..c322475 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -27,6 +27,7 @@
 #include "art_method-inl.h"
 #include "atomic.h"
 #include "base/allocator.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
@@ -301,13 +302,13 @@
     CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value, return_val)
 
 #define CHECK_NON_NULL_ARGUMENT_FN_NAME(name, value, return_val) \
-  if (UNLIKELY(value == nullptr)) { \
+  if (UNLIKELY((value) == nullptr)) { \
     JavaVmExtFromEnv(env)->JniAbortF(name, #value " == null"); \
     return return_val; \
   }
 
 #define CHECK_NON_NULL_MEMCPY_ARGUMENT(length, value) \
-  if (UNLIKELY(length != 0 && value == nullptr)) { \
+  if (UNLIKELY((length) != 0 && (value) == nullptr)) { \
     JavaVmExtFromEnv(env)->JniAbortF(__FUNCTION__, #value " == null"); \
     return; \
   }
@@ -375,10 +376,12 @@
     ScopedObjectAccess soa(env);
     ArtMethod* m = soa.DecodeMethod(mid);
     mirror::AbstractMethod* method;
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+    DCHECK(!Runtime::Current()->IsActiveTransaction());
     if (m->IsConstructor()) {
-      method = mirror::Constructor::CreateFromArtMethod(soa.Self(), m);
+      method = mirror::Constructor::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), m);
     } else {
-      method = mirror::Method::CreateFromArtMethod(soa.Self(), m);
+      method = mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), m);
     }
     return soa.AddLocalReference<jobject>(method);
   }
@@ -387,7 +390,8 @@
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
     ArtField* f = soa.DecodeField(fid);
-    return soa.AddLocalReference<jobject>(mirror::Field::CreateFromArtField(soa.Self(), f, true));
+    return soa.AddLocalReference<jobject>(
+        mirror::Field::CreateFromArtField<kRuntimePointerSize>(soa.Self(), f, true));
   }
 
   static jclass GetObjectClass(JNIEnv* env, jobject java_object) {
@@ -2201,6 +2205,7 @@
 
       VLOG(jni) << "[Registering JNI native method " << PrettyMethod(m) << "]";
 
+      is_fast = is_fast || m->IsFastNative();  // Merge with @FastNative state.
       m->RegisterNative(fnPtr, is_fast);
     }
     return JNI_OK;
diff --git a/runtime/lambda/art_lambda_method.cc b/runtime/lambda/art_lambda_method.cc
deleted file mode 100644
index 6f9f8bb..0000000
--- a/runtime/lambda/art_lambda_method.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/art_lambda_method.h"
-
-#include "base/logging.h"
-#include "lambda/shorty_field_type.h"
-
-namespace art {
-namespace lambda {
-
-ArtLambdaMethod::ArtLambdaMethod(ArtMethod* target_method,
-                                 const char* captured_variables_type_descriptor,
-                                 const char* captured_variables_shorty,
-                                 bool innate_lambda)
-    : method_(target_method),
-      captured_variables_type_descriptor_(captured_variables_type_descriptor),
-      captured_variables_shorty_(captured_variables_shorty),
-      innate_lambda_(innate_lambda) {
-  DCHECK(target_method != nullptr);
-  DCHECK(captured_variables_type_descriptor != nullptr);
-  DCHECK(captured_variables_shorty != nullptr);
-
-  // Calculate the static closure size from the captured variables.
-  size_t size = sizeof(ArtLambdaMethod*);  // Initial size is just this method.
-  bool static_size = true;
-  const char* shorty = captured_variables_shorty_;
-  while (shorty != nullptr && *shorty != '\0') {
-    // Each captured variable also appends to the size.
-    ShortyFieldType shorty_field{*shorty};  // NOLINT [readability/braces] [4]
-    size += shorty_field.GetStaticSize();
-    static_size &= shorty_field.IsStaticSize();
-    ++shorty;
-  }
-  closure_size_ = size;
-
-  // We determine whether or not the size is dynamic by checking for nested lambdas.
-  //
-  // This is conservative, since in theory an optimization could determine the size
-  // of the nested lambdas recursively. In practice it's probably better to flatten out
-  // nested lambdas and inline all their code if they are known statically.
-  dynamic_size_ = !static_size;
-
-  if (kIsDebugBuild) {
-    // Double check that the number of captured variables match in both strings.
-    size_t shorty_count = strlen(captured_variables_shorty);
-
-    size_t long_count = 0;
-    const char* long_type = captured_variables_type_descriptor;
-    ShortyFieldType out;
-    while ((long_type = ShortyFieldType::ParseFromFieldTypeDescriptor(long_type, &out))
-           != nullptr) {
-      ++long_count;
-    }
-
-    DCHECK_EQ(shorty_count, long_count)
-        << "number of captured variables in long type '" << captured_variables_type_descriptor
-        << "' (" << long_count << ")" << " did not match short type '"
-        << captured_variables_shorty << "' (" << shorty_count << ")";
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/art_lambda_method.h b/runtime/lambda/art_lambda_method.h
deleted file mode 100644
index ea13eb7..0000000
--- a/runtime/lambda/art_lambda_method.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
-#define ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
-
-#include "base/macros.h"
-#include "art_method.h"
-
-#include <stdint.h>
-
-namespace art {
-namespace lambda {
-
-class ArtLambdaMethod {
- public:
-  // Construct an art lambda method.
-  // The target method is the one invoked by invoke-lambda.
-  // The type descriptor describes the types of variables captured, e.g. "ZFLObject;\FI;[Z"
-  // The shorty drops the object name and treats arrays as objects, e.g. "ZFL\L"
-  // Innate lambda means that the lambda was originally created via invoke-lambda.
-  // -- Non-innate lambdas (learned lambdas) come from a regular class that was boxed to lambda.
-  // (Ownership of strings is retained by the caller and the lifetime should exceed this class).
-  ArtLambdaMethod(ArtMethod* target_method,
-                  const char* captured_variables_type_descriptor,
-                  const char* captured_variables_shorty,
-                  bool innate_lambda = true);
-
-  // Get the target method for this lambda that would be used by the invoke-lambda dex instruction.
-  ArtMethod* GetArtMethod() const {
-    return method_;
-  }
-
-  // Get the compile-time size of lambda closures for this method in bytes.
-  // This is circular (that is, it includes the size of the ArtLambdaMethod pointer).
-  // One should also check if the size is dynamic since nested lambdas have a runtime size.
-  size_t GetStaticClosureSize() const {
-    return closure_size_;
-  }
-
-  // Get the type descriptor for the list of captured variables.
-  // e.g. "ZFLObject;\FI;[Z" means a captured int, float, class Object, lambda FI, array of ints
-  const char* GetCapturedVariablesTypeDescriptor() const {
-    return captured_variables_type_descriptor_;
-  }
-
-  // Get the shorty 'field' type descriptor list of captured variables.
-  // This follows the same rules as a string of ShortyFieldType in the dex specification.
-  // Every captured variable is represented by exactly one character.
-  // - Objects become 'L'.
-  // - Arrays become 'L'.
-  // - Lambdas become '\'.
-  const char* GetCapturedVariablesShortyTypeDescriptor() const {
-    return captured_variables_shorty_;
-  }
-
-  // Will the size of this lambda change at runtime?
-  // Only returns true if there is a nested lambda that we can't determine statically the size of.
-  bool IsDynamicSize() const {
-    return dynamic_size_;
-  }
-
-  // Will the size of this lambda always be constant at runtime?
-  // This generally means there's no nested lambdas, or we were able to successfully determine
-  // their size statically at compile time.
-  bool IsStaticSize() const {
-    return !IsDynamicSize();
-  }
-  // Is this a lambda that was originally created via invoke-lambda?
-  // -- Non-innate lambdas (learned lambdas) come from a regular class that was boxed to lambda.
-  bool IsInnateLambda() const {
-    return innate_lambda_;
-  }
-
-  // How many variables were captured?
-  // (Each nested lambda counts as 1 captured var regardless of how many captures it itself has).
-  size_t GetNumberOfCapturedVariables() const {
-    return strlen(captured_variables_shorty_);
-  }
-
- private:
-  // TODO: ArtMethod, or at least the entry points should be inlined into this struct
-  // to avoid an extra indirect load when doing invokes.
-  // Target method that invoke-lambda will jump to.
-  ArtMethod* method_;
-  // How big the closure is (in bytes). Only includes the constant size.
-  size_t closure_size_;
-  // The type descriptor for the captured variables, e.g. "IS" for [int, short]
-  const char* captured_variables_type_descriptor_;
-  // The shorty type descriptor for captured vars, (e.g. using 'L' instead of 'LObject;')
-  const char* captured_variables_shorty_;
-  // Whether or not the size is dynamic. If it is, copiers need to read the Closure size at runtime.
-  bool dynamic_size_;
-  // True if this lambda was originally made with create-lambda,
-  // false if it came from a class instance (through new-instance and then unbox-lambda).
-  bool innate_lambda_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArtLambdaMethod);
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_ART_LAMBDA_METHOD_H_
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
deleted file mode 100644
index 9918bb7..0000000
--- a/runtime/lambda/box_table.cc
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "lambda/box_table.h"
-
-#include "base/mutex.h"
-#include "common_throws.h"
-#include "gc_root-inl.h"
-#include "lambda/closure.h"
-#include "lambda/leaking_allocator.h"
-#include "mirror/method.h"
-#include "mirror/object-inl.h"
-#include "thread.h"
-
-#include <vector>
-
-namespace art {
-namespace lambda {
-// Temporarily represent the lambda Closure as its raw bytes in an array.
-// TODO: Generate a proxy class for the closure when boxing the first time.
-using BoxedClosurePointerType = mirror::ByteArray*;
-
-static mirror::Class* GetBoxedClosureClass() SHARED_REQUIRES(Locks::mutator_lock_) {
-  return mirror::ByteArray::GetArrayClass();
-}
-
-namespace {
-  // Convenience functions to allocating/deleting box table copies of the closures.
-  struct ClosureAllocator {
-    // Deletes a Closure that was allocated through ::Allocate.
-    static void Delete(Closure* ptr) {
-      delete[] reinterpret_cast<char*>(ptr);
-    }
-
-    // Returns a well-aligned pointer to a newly allocated Closure on the 'new' heap.
-    static Closure* Allocate(size_t size) {
-      DCHECK_GE(size, sizeof(Closure));
-
-      // TODO: Maybe point to the interior of the boxed closure object after we add proxy support?
-      Closure* closure = reinterpret_cast<Closure*>(new char[size]);
-      DCHECK_ALIGNED(closure, alignof(Closure));
-      return closure;
-    }
-  };
-}  // namespace
-
-BoxTable::BoxTable()
-  : allow_new_weaks_(true),
-    new_weaks_condition_("lambda box table allowed weaks", *Locks::lambda_table_lock_) {}
-
-BoxTable::~BoxTable() {
-  // Free all the copies of our closures.
-  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
-    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-
-    Closure* closure = key_value_pair.first;
-
-    // Remove from the map first, so that it doesn't try to access dangling pointer.
-    map_iterator = map_.Erase(map_iterator);
-
-    // Safe to delete, no dangling pointers.
-    ClosureAllocator::Delete(closure);
-  }
-}
-
-mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) {
-  Thread* self = Thread::Current();
-
-  {
-    // TODO: Switch to ReaderMutexLock if ConditionVariable ever supports RW Mutexes
-    /*Reader*/MutexLock mu(self, *Locks::lambda_table_lock_);
-    BlockUntilWeaksAllowed();
-
-    // Attempt to look up this object, it's possible it was already boxed previously.
-    // If this is the case we *must* return the same object as before to maintain
-    // referential equality.
-    //
-    // In managed code:
-    //   Functional f = () -> 5;  // vF = create-lambda
-    //   Object a = f;            // vA = box-lambda vA
-    //   Object b = f;            // vB = box-lambda vB
-    //   assert(a == f)
-    ValueType value = FindBoxedLambda(closure);
-    if (!value.IsNull()) {
-      return value.Read();
-    }
-
-    // Otherwise we need to box ourselves and insert it into the hash map
-  }
-
-  // Release the lambda table lock here, so that thread suspension is allowed.
-
-  // Convert the Closure into a managed byte[] which will serve
-  // as the temporary 'boxed' version of the lambda. This is good enough
-  // to check all the basic object identities that a boxed lambda must retain.
-  // It's also good enough to contain all the captured primitive variables.
-
-  // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
-  // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
-  BoxedClosurePointerType closure_as_array_object =
-      mirror::ByteArray::Alloc(self, closure->GetSize());
-
-  // There are no thread suspension points after this, so we don't need to put it into a handle.
-
-  if (UNLIKELY(closure_as_array_object == nullptr)) {
-    // Most likely an OOM has occurred.
-    CHECK(self->IsExceptionPending());
-    return nullptr;
-  }
-
-  // Write the raw closure data into the byte[].
-  closure->CopyTo(closure_as_array_object->GetRawData(sizeof(uint8_t),  // component size
-                                                      0 /*index*/),     // index
-                  closure_as_array_object->GetLength());
-
-  // The method has been successfully boxed into an object, now insert it into the hash map.
-  {
-    MutexLock mu(self, *Locks::lambda_table_lock_);
-    BlockUntilWeaksAllowed();
-
-    // Lookup the object again, it's possible another thread already boxed it while
-    // we were allocating the object before.
-    ValueType value = FindBoxedLambda(closure);
-    if (UNLIKELY(!value.IsNull())) {
-      // Let the GC clean up method_as_object at a later time.
-      return value.Read();
-    }
-
-    // Otherwise we need to insert it into the hash map in this thread.
-
-    // Make a copy for the box table to keep, in case the closure gets collected from the stack.
-    // TODO: GC may need to sweep for roots in the box table's copy of the closure.
-    Closure* closure_table_copy = ClosureAllocator::Allocate(closure->GetSize());
-    closure->CopyTo(closure_table_copy, closure->GetSize());
-
-    // The closure_table_copy needs to be deleted by us manually when we erase it from the map.
-
-    // Actually insert into the table.
-    map_.Insert({closure_table_copy, ValueType(closure_as_array_object)});
-  }
-
-  return closure_as_array_object;
-}
-
-bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) {
-  DCHECK(object != nullptr);
-  *out_closure = nullptr;
-
-  Thread* self = Thread::Current();
-
-  // Note that we do not need to access lambda_table_lock_ here
-  // since we don't need to look at the map.
-
-  mirror::Object* boxed_closure_object = object;
-
-  // Raise ClassCastException if object is not instanceof byte[]
-  if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureClass()))) {
-    ThrowClassCastException(GetBoxedClosureClass(), boxed_closure_object->GetClass());
-    return false;
-  }
-
-  // TODO(iam): We must check that the closure object extends/implements the type
-  // specified in [type id]. This is not currently implemented since it's always a byte[].
-
-  // If we got this far, the inputs are valid.
-  // Shuffle the byte[] back into a raw closure, then allocate it, copy, and return it.
-  BoxedClosurePointerType boxed_closure_as_array =
-      down_cast<BoxedClosurePointerType>(boxed_closure_object);
-
-  const int8_t* unaligned_interior_closure = boxed_closure_as_array->GetData();
-
-  // Allocate a copy that can "escape" and copy the closure data into that.
-  Closure* unboxed_closure =
-      LeakingAllocator::MakeFlexibleInstance<Closure>(self, boxed_closure_as_array->GetLength());
-  // TODO: don't just memcpy the closure, it's unsafe when we add references to the mix.
-  memcpy(unboxed_closure, unaligned_interior_closure, boxed_closure_as_array->GetLength());
-
-  DCHECK_EQ(unboxed_closure->GetSize(), static_cast<size_t>(boxed_closure_as_array->GetLength()));
-
-  *out_closure = unboxed_closure;
-  return true;
-}
-
-BoxTable::ValueType BoxTable::FindBoxedLambda(const ClosureType& closure) const {
-  auto map_iterator = map_.Find(closure);
-  if (map_iterator != map_.end()) {
-    const std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-    const ValueType& value = key_value_pair.second;
-
-    DCHECK(!value.IsNull());  // Never store null boxes.
-    return value;
-  }
-
-  return ValueType(nullptr);
-}
-
-void BoxTable::BlockUntilWeaksAllowed() {
-  Thread* self = Thread::Current();
-  while (UNLIKELY((!kUseReadBarrier && !allow_new_weaks_) ||
-                  (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
-    new_weaks_condition_.WaitHoldingLocks(self);  // wait while holding mutator lock
-  }
-}
-
-void BoxTable::SweepWeakBoxedLambdas(IsMarkedVisitor* visitor) {
-  DCHECK(visitor != nullptr);
-
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  /*
-   * Visit every weak root in our lambda box table.
-   * Remove unmarked objects, update marked objects to new address.
-   */
-  std::vector<ClosureType> remove_list;
-  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
-    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
-
-    const ValueType& old_value = key_value_pair.second;
-
-    // This does not need a read barrier because this is called by GC.
-    mirror::Object* old_value_raw = old_value.Read<kWithoutReadBarrier>();
-    mirror::Object* new_value = visitor->IsMarked(old_value_raw);
-
-    if (new_value == nullptr) {
-      // The object has been swept away.
-      const ClosureType& closure = key_value_pair.first;
-
-      // Delete the entry from the map.
-      map_iterator = map_.Erase(map_iterator);
-
-      // Clean up the memory by deleting the closure.
-      ClosureAllocator::Delete(closure);
-
-    } else {
-      // The object has been moved.
-      // Update the map.
-      key_value_pair.second = ValueType(new_value);
-      ++map_iterator;
-    }
-  }
-
-  // Occasionally shrink the map to avoid growing very large.
-  if (map_.CalculateLoadFactor() < kMinimumLoadFactor) {
-    map_.ShrinkToMaximumLoad();
-  }
-}
-
-void BoxTable::DisallowNewWeakBoxedLambdas() {
-  CHECK(!kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  allow_new_weaks_ = false;
-}
-
-void BoxTable::AllowNewWeakBoxedLambdas() {
-  CHECK(!kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-
-  allow_new_weaks_ = true;
-  new_weaks_condition_.Broadcast(self);
-}
-
-void BoxTable::BroadcastForNewWeakBoxedLambdas() {
-  CHECK(kUseReadBarrier);
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::lambda_table_lock_);
-  new_weaks_condition_.Broadcast(self);
-}
-
-void BoxTable::EmptyFn::MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const {
-  item.first = nullptr;
-
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-  item.second = ValueType();  // Also clear the GC root.
-}
-
-bool BoxTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const {
-  return item.first == nullptr;
-}
-
-bool BoxTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs,
-                                    const UnorderedMapKeyType& rhs) const {
-  // Nothing needs this right now, but leave this assertion for later when
-  // we need to look at the references inside of the closure.
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-
-  return lhs->ReferenceEquals(rhs);
-}
-
-size_t BoxTable::HashFn::operator()(const UnorderedMapKeyType& key) const {
-  const lambda::Closure* closure = key;
-  DCHECK_ALIGNED(closure, alignof(lambda::Closure));
-
-  // Need to hold mutator_lock_ before calling into Closure::GetHashCode.
-  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-  return closure->GetHashCode();
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h
deleted file mode 100644
index adb7332..0000000
--- a/runtime/lambda/box_table.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_BOX_TABLE_H_
-#define ART_RUNTIME_LAMBDA_BOX_TABLE_H_
-
-#include "base/allocator.h"
-#include "base/hash_map.h"
-#include "gc_root.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "object_callbacks.h"
-
-#include <stdint.h>
-
-namespace art {
-
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-struct Closure;  // forward declaration
-
-/*
- * Store a table of boxed lambdas. This is required to maintain object referential equality
- * when a lambda is re-boxed.
- *
- * Conceptually, we store a mapping of Closures -> Weak Reference<Boxed Lambda Object>.
- * When too many objects get GCd, we shrink the underlying table to use less space.
- */
-class BoxTable FINAL {
- public:
-  using ClosureType = art::lambda::Closure*;
-
-  // Boxes a closure into an object. Returns null and throws an exception on failure.
-  mirror::Object* BoxLambda(const ClosureType& closure)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::lambda_table_lock_);
-
-  // Unboxes an object back into the lambda. Returns false and throws an exception on failure.
-  bool UnboxLambda(mirror::Object* object, ClosureType* out_closure)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Sweep weak references to lambda boxes. Update the addresses if the objects have been
-  // moved, and delete them from the table if the objects have been cleaned up.
-  void SweepWeakBoxedLambdas(IsMarkedVisitor* visitor)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Temporarily block anyone from touching the map.
-  void DisallowNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Unblock any readers who have been queued waiting to touch the map.
-  void AllowNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  // GC callback: Unblock any readers who have been queued waiting to touch the map.
-  void BroadcastForNewWeakBoxedLambdas()
-      REQUIRES(!Locks::lambda_table_lock_);
-
-  BoxTable();
-  ~BoxTable();
-
- private:
-  // Explanation:
-  // - After all threads are suspended (exclusive mutator lock),
-  //   the concurrent-copying GC can move objects from the "from" space to the "to" space.
-  // If an object is moved at that time and *before* SweepSystemWeaks are called then
-  // we don't know if the move has happened yet.
-  // Successive reads will then (incorrectly) look at the objects in the "from" space,
-  // which is a problem since the objects have been already forwarded and mutations
-  // would not be visible in the right space.
-  // Instead, use a GcRoot here which will be automatically updated by the GC.
-  //
-  // Also, any reads should be protected by a read barrier to always give us the "to" space address.
-  using ValueType = GcRoot<mirror::Object>;
-
-  // Attempt to look up the lambda in the map, or return null if it's not there yet.
-  ValueType FindBoxedLambda(const ClosureType& closure) const
-      SHARED_REQUIRES(Locks::lambda_table_lock_);
-
-  // If the GC has come in and temporarily disallowed touching weaks, block until is it allowed.
-  void BlockUntilWeaksAllowed()
-      SHARED_REQUIRES(Locks::lambda_table_lock_);
-
-  // Wrap the Closure into a unique_ptr so that the HashMap can delete its memory automatically.
-  using UnorderedMapKeyType = ClosureType;
-
-  // EmptyFn implementation for art::HashMap
-  struct EmptyFn {
-    void MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-
-    bool IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const;
-  };
-
-  // HashFn implementation for art::HashMap
-  struct HashFn {
-    size_t operator()(const UnorderedMapKeyType& key) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-  };
-
-  // EqualsFn implementation for art::HashMap
-  struct EqualsFn {
-    bool operator()(const UnorderedMapKeyType& lhs, const UnorderedMapKeyType& rhs) const
-        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
-  };
-
-  using UnorderedMap = art::HashMap<UnorderedMapKeyType,
-                                    ValueType,
-                                    EmptyFn,
-                                    HashFn,
-                                    EqualsFn,
-                                    TrackingAllocator<std::pair<ClosureType, ValueType>,
-                                                      kAllocatorTagLambdaBoxTable>>;
-
-  UnorderedMap map_                                          GUARDED_BY(Locks::lambda_table_lock_);
-  bool allow_new_weaks_                                      GUARDED_BY(Locks::lambda_table_lock_);
-  ConditionVariable new_weaks_condition_                     GUARDED_BY(Locks::lambda_table_lock_);
-
-  // Shrink the map when we get below this load factor.
-  // (This is an arbitrary value that should be large enough to prevent aggressive map erases
-  // from shrinking the table too often.)
-  static constexpr double kMinimumLoadFactor = UnorderedMap::kDefaultMinLoadFactor / 2;
-
-  DISALLOW_COPY_AND_ASSIGN(BoxTable);
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_BOX_TABLE_H_
diff --git a/runtime/lambda/closure.cc b/runtime/lambda/closure.cc
deleted file mode 100644
index 179e4ee..0000000
--- a/runtime/lambda/closure.cc
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/closure.h"
-
-#include "base/logging.h"
-#include "lambda/art_lambda_method.h"
-#include "runtime/mirror/object_reference.h"
-
-static constexpr const bool kClosureSupportsReferences = false;
-static constexpr const bool kClosureSupportsGarbageCollection = false;
-
-namespace art {
-namespace lambda {
-
-template <typename T>
-// TODO: can I return T __attribute__((__aligned__(1)))* here instead?
-const uint8_t* Closure::GetUnsafeAtOffset(size_t offset) const {
-  // Do not DCHECK here with existing helpers since most of them will call into this function.
-  return reinterpret_cast<const uint8_t*>(captured_) + offset;
-}
-
-size_t Closure::GetCapturedVariableSize(ShortyFieldType variable_type, size_t offset) const {
-  switch (variable_type) {
-    case ShortyFieldType::kLambda:
-    {
-      return GetClosureSize(GetUnsafeAtOffset<Closure>(offset));
-    }
-    default:
-      DCHECK(variable_type.IsStaticSize());
-      return variable_type.GetStaticSize();
-  }
-}
-
-// Templatize the flags to give the compiler a fighting chance to eliminate
-// any unnecessary code through different uses of this function.
-template <Closure::VariableInfo::Flags flags>
-inline Closure::VariableInfo Closure::ParseTypeDescriptor(const char* type_descriptor,
-                                                          size_t upto_index) const {
-  DCHECK(type_descriptor != nullptr);
-
-  VariableInfo result;
-
-  ShortyFieldType last_type;
-  size_t offset = (flags & VariableInfo::kOffset) ? GetStartingOffset() : 0;
-  size_t prev_offset = 0;
-  size_t count = 0;
-
-  while ((type_descriptor =
-      ShortyFieldType::ParseFromFieldTypeDescriptor(type_descriptor, &last_type)) != nullptr) {
-    count++;
-
-    if (flags & VariableInfo::kOffset) {
-      // Accumulate the sizes of all preceding captured variables as the current offset only.
-      offset += prev_offset;
-      prev_offset = GetCapturedVariableSize(last_type, offset);
-    }
-
-    if ((count > upto_index)) {
-      break;
-    }
-  }
-
-  if (flags & VariableInfo::kVariableType) {
-    result.variable_type_ = last_type;
-  }
-
-  if (flags & VariableInfo::kIndex) {
-    result.index_ = count;
-  }
-
-  if (flags & VariableInfo::kCount) {
-    result.count_ = count;
-  }
-
-  if (flags & VariableInfo::kOffset) {
-    result.offset_ = offset;
-  }
-
-  // TODO: We should probably store the result of this in the ArtLambdaMethod,
-  // to avoid re-computing the data every single time for static closures.
-  return result;
-}
-
-size_t Closure::GetCapturedVariablesSize() const {
-  const size_t captured_variable_offset = offsetof(Closure, captured_);
-  DCHECK_GE(GetSize(), captured_variable_offset);  // Prevent underflows.
-  return GetSize() - captured_variable_offset;
-}
-
-size_t Closure::GetSize() const {
-  const size_t static_closure_size = lambda_info_->GetStaticClosureSize();
-  if (LIKELY(lambda_info_->IsStaticSize())) {
-    return static_closure_size;
-  }
-
-  DCHECK_GE(static_closure_size, sizeof(captured_[0].dynamic_.size_));
-  const size_t dynamic_closure_size = captured_[0].dynamic_.size_;
-  // The dynamic size better be at least as big as the static size.
-  DCHECK_GE(dynamic_closure_size, static_closure_size);
-
-  return dynamic_closure_size;
-}
-
-void Closure::CopyTo(void* target, size_t target_size) const {
-  DCHECK_GE(target_size, GetSize());
-
-  // TODO: using memcpy is unsafe with read barriers, fix this once we add reference support
-  static_assert(kClosureSupportsReferences == false,
-                "Do not use memcpy with readbarrier references");
-  memcpy(target, this, GetSize());
-}
-
-ArtMethod* Closure::GetTargetMethod() const {
-  return const_cast<ArtMethod*>(lambda_info_->GetArtMethod());
-}
-
-uint32_t Closure::GetHashCode() const {
-  // Start with a non-zero constant, a prime number.
-  uint32_t result = 17;
-
-  // Include the hash with the ArtMethod.
-  {
-    uintptr_t method = reinterpret_cast<uintptr_t>(GetTargetMethod());
-    result = 31 * result + Low32Bits(method);
-    if (sizeof(method) == sizeof(uint64_t)) {
-      result = 31 * result + High32Bits(method);
-    }
-  }
-
-  // Include a hash for each captured variable.
-  for (size_t i = 0; i < GetCapturedVariablesSize(); ++i) {
-    // TODO: not safe for GC-able values since the address can move and the hash code would change.
-    uint8_t captured_variable_raw_value;
-    CopyUnsafeAtOffset<uint8_t>(i, /*out*/&captured_variable_raw_value);  // NOLINT: [whitespace/comma] [3]
-
-    result = 31 * result + captured_variable_raw_value;
-  }
-
-  // TODO: Fix above loop to work for objects and lambdas.
-  static_assert(kClosureSupportsGarbageCollection == false,
-               "Need to update above loop to read the hash code from the "
-                "objects and lambdas recursively");
-
-  return result;
-}
-
-bool Closure::ReferenceEquals(const Closure* other) const {
-  DCHECK(other != nullptr);
-
-  // TODO: Need rework to use read barriers once closures have references inside of them that can
-  // move. Until then, it's safe to just compare the data inside of it directly.
-  static_assert(kClosureSupportsReferences == false,
-                "Unsafe to use memcmp in read barrier collector");
-
-  if (GetSize() != other->GetSize()) {
-    return false;
-  }
-
-  return memcmp(this, other, GetSize());
-}
-
-size_t Closure::GetNumberOfCapturedVariables() const {
-  // TODO: refactor into art_lambda_method.h. Parsing should only be required here as a DCHECK.
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kCount>(GetCapturedVariablesTypeDescriptor(),
-                                                VariableInfo::kUpToIndexMax);
-  size_t count = variable_info.count_;
-  // Assuming each variable was 1 byte, the size should always be greater or equal than the count.
-  DCHECK_LE(count, GetCapturedVariablesSize());
-  return count;
-}
-
-const char* Closure::GetCapturedVariablesTypeDescriptor() const {
-  return lambda_info_->GetCapturedVariablesTypeDescriptor();
-}
-
-ShortyFieldType Closure::GetCapturedShortyType(size_t index) const {
-  DCHECK_LT(index, GetNumberOfCapturedVariables());
-
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kVariableType>(GetCapturedVariablesTypeDescriptor(),
-                                                       index);
-
-  return variable_info.variable_type_;
-}
-
-uint32_t Closure::GetCapturedPrimitiveNarrow(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsPrimitiveNarrow());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  uint32_t result = 0;
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveNarrowType<decltype(result)>(),
-                "result must be a primitive narrow type");
-  switch (variable_type) {
-    case ShortyFieldType::kBoolean:
-      CopyUnsafeAtOffset<bool>(offset, &result);
-      break;
-    case ShortyFieldType::kByte:
-      CopyUnsafeAtOffset<uint8_t>(offset, &result);
-      break;
-    case ShortyFieldType::kChar:
-      CopyUnsafeAtOffset<uint16_t>(offset, &result);
-      break;
-    case ShortyFieldType::kShort:
-      CopyUnsafeAtOffset<int16_t>(offset, &result);
-      break;
-    case ShortyFieldType::kInt:
-      CopyUnsafeAtOffset<int32_t>(offset, &result);
-      break;
-    case ShortyFieldType::kFloat:
-      // XX: Maybe there should just be a GetCapturedPrimitive<T> to avoid this shuffle?
-      // The interpreter's invoke seems to only special case references and wides,
-      // everything else is treated as a generic 32-bit pattern.
-      CopyUnsafeAtOffset<float>(offset, &result);
-      break;
-    default:
-      LOG(FATAL)
-          << "expected a valid narrow primitive shorty type but got "
-          << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-uint64_t Closure::GetCapturedPrimitiveWide(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsPrimitiveWide());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  uint64_t result = 0;
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveWideType<decltype(result)>(),
-                "result must be a primitive wide type");
-  switch (variable_type) {
-    case ShortyFieldType::kLong:
-      CopyUnsafeAtOffset<int64_t>(offset, &result);
-      break;
-    case ShortyFieldType::kDouble:
-      CopyUnsafeAtOffset<double>(offset, &result);
-      break;
-    default:
-      LOG(FATAL)
-          << "expected a valid primitive wide shorty type but got "
-          << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-mirror::Object* Closure::GetCapturedObject(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsObject());
-
-  ShortyFieldType variable_type;
-  size_t offset;
-  GetCapturedVariableTypeAndOffset(index, &variable_type, &offset);
-
-  // TODO: Restructure to use template specialization, e.g. GetCapturedPrimitive<T>
-  // so that we can avoid this nonsense regarding memcpy always overflowing.
-  // Plus, this additional switching seems redundant since the interpreter
-  // would've done it already, and knows the exact type.
-  mirror::Object* result = nullptr;
-  static_assert(ShortyFieldTypeTraits::IsObjectType<decltype(result)>(),
-                "result must be an object type");
-  switch (variable_type) {
-    case ShortyFieldType::kObject:
-      // TODO: This seems unsafe. This may need to use gcroots.
-      static_assert(kClosureSupportsGarbageCollection == false,
-                    "May need GcRoots and definitely need mutator locks");
-      {
-        mirror::CompressedReference<mirror::Object> compressed_result;
-        CopyUnsafeAtOffset<uint32_t>(offset, &compressed_result);
-        result = compressed_result.AsMirrorPtr();
-      }
-      break;
-    default:
-      CHECK(false)
-          << "expected a valid shorty type but got " << static_cast<char>(variable_type);
-      UNREACHABLE();
-  }
-
-  return result;
-}
-
-size_t Closure::GetCapturedClosureSize(size_t index) const {
-  DCHECK(GetCapturedShortyType(index).IsLambda());
-  size_t offset = GetCapturedVariableOffset(index);
-
-  auto* captured_ptr = reinterpret_cast<const uint8_t*>(&captured_);
-  size_t closure_size = GetClosureSize(captured_ptr + offset);
-
-  return closure_size;
-}
-
-void Closure::CopyCapturedClosure(size_t index, void* destination, size_t destination_room) const {
-  DCHECK(GetCapturedShortyType(index).IsLambda());
-  size_t offset = GetCapturedVariableOffset(index);
-
-  auto* captured_ptr = reinterpret_cast<const uint8_t*>(&captured_);
-  size_t closure_size = GetClosureSize(captured_ptr + offset);
-
-  static_assert(ShortyFieldTypeTraits::IsLambdaType<Closure*>(),
-                "result must be a lambda type");
-
-  CopyUnsafeAtOffset<Closure>(offset, destination, closure_size, destination_room);
-}
-
-size_t Closure::GetCapturedVariableOffset(size_t index) const {
-  VariableInfo variable_info =
-      ParseTypeDescriptor<VariableInfo::kOffset>(GetCapturedVariablesTypeDescriptor(),
-                                                 index);
-
-  size_t offset = variable_info.offset_;
-
-  return offset;
-}
-
-void Closure::GetCapturedVariableTypeAndOffset(size_t index,
-                                               ShortyFieldType* out_type,
-                                               size_t* out_offset) const {
-  DCHECK(out_type != nullptr);
-  DCHECK(out_offset != nullptr);
-
-  static constexpr const VariableInfo::Flags kVariableTypeAndOffset =
-      static_cast<VariableInfo::Flags>(VariableInfo::kVariableType | VariableInfo::kOffset);
-  VariableInfo variable_info =
-      ParseTypeDescriptor<kVariableTypeAndOffset>(GetCapturedVariablesTypeDescriptor(),
-                                                  index);
-
-  ShortyFieldType variable_type = variable_info.variable_type_;
-  size_t offset = variable_info.offset_;
-
-  *out_type = variable_type;
-  *out_offset = offset;
-}
-
-template <typename T>
-void Closure::CopyUnsafeAtOffset(size_t offset,
-                                 void* destination,
-                                 size_t src_size,
-                                 size_t destination_room) const {
-  DCHECK_GE(destination_room, src_size);
-  const uint8_t* data_ptr = GetUnsafeAtOffset<T>(offset);
-  memcpy(destination, data_ptr, sizeof(T));
-}
-
-// TODO: This is kind of ugly. I would prefer an unaligned_ptr<Closure> here.
-// Unfortunately C++ doesn't let you lower the alignment (i.e. alignas(1) Closure*) is not legal.
-size_t Closure::GetClosureSize(const uint8_t* closure) {
-  DCHECK(closure != nullptr);
-
-  static_assert(!std::is_base_of<mirror::Object, Closure>::value,
-                "It might be unsafe to call memcpy on a managed object");
-
-  // Safe as long as it's not a mirror Object.
-  // TODO: Should probably wrap this in like MemCpyNative or some such which statically asserts
-  // we aren't trying to copy mirror::Object data around.
-  ArtLambdaMethod* closure_info;
-  memcpy(&closure_info, closure + offsetof(Closure, lambda_info_), sizeof(closure_info));
-
-  if (LIKELY(closure_info->IsStaticSize())) {
-    return closure_info->GetStaticClosureSize();
-  }
-
-  // The size is dynamic, so we need to read it from captured_variables_ portion.
-  size_t dynamic_size;
-  memcpy(&dynamic_size,
-         closure + offsetof(Closure, captured_[0].dynamic_.size_),
-         sizeof(dynamic_size));
-  static_assert(sizeof(dynamic_size) == sizeof(captured_[0].dynamic_.size_),
-                "Dynamic size type must match the structural type of the size");
-
-  DCHECK_GE(dynamic_size, closure_info->GetStaticClosureSize());
-  return dynamic_size;
-}
-
-size_t Closure::GetStartingOffset() const {
-  static constexpr const size_t captured_offset = offsetof(Closure, captured_);
-  if (LIKELY(lambda_info_->IsStaticSize())) {
-    return offsetof(Closure, captured_[0].static_variables_) - captured_offset;
-  } else {
-    return offsetof(Closure, captured_[0].dynamic_.variables_) - captured_offset;
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/closure.h b/runtime/lambda/closure.h
deleted file mode 100644
index 31ff194..0000000
--- a/runtime/lambda/closure.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_H_
-
-#include "base/macros.h"
-#include "base/mutex.h"  // For Locks::mutator_lock_.
-#include "lambda/shorty_field_type.h"
-
-#include <stdint.h>
-
-namespace art {
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-class ArtLambdaMethod;  // forward declaration
-class ClosureBuilder;   // forward declaration
-
-// Inline representation of a lambda closure.
-// Contains the target method and the set of packed captured variables as a copy.
-//
-// The closure itself is logically immutable, although in practice any object references
-// it (recursively) contains can be moved and updated by the GC.
-struct PACKED(sizeof(ArtLambdaMethod*)) Closure {
-  // Get the size of the Closure in bytes.
-  // This is necessary in order to allocate a large enough area to copy the Closure into.
-  // Do *not* copy the closure with memcpy, since references also need to get moved.
-  size_t GetSize() const;
-
-  // Copy this closure into the target, whose memory size is specified by target_size.
-  // Any object references are fixed up during the copy (if there was a read barrier).
-  // The target_size must be at least as large as GetSize().
-  void CopyTo(void* target, size_t target_size) const;
-
-  // Get the target method, i.e. the method that will be dispatched into with invoke-lambda.
-  ArtMethod* GetTargetMethod() const;
-
-  // Calculates the hash code. Value is recomputed each time.
-  uint32_t GetHashCode() const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Is this the same closure as other? e.g. same target method, same variables captured.
-  //
-  // Determines whether the two Closures are interchangeable instances.
-  // Does *not* call Object#equals recursively. If two Closures compare ReferenceEquals true that
-  // means that they are interchangeable values (usually for the purpose of boxing/unboxing).
-  bool ReferenceEquals(const Closure* other) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // How many variables were captured?
-  size_t GetNumberOfCapturedVariables() const;
-
-  // Returns a type descriptor string that represents each captured variable.
-  // e.g. "Ljava/lang/Object;ZB" would mean a capture tuple of (Object, boolean, byte)
-  const char* GetCapturedVariablesTypeDescriptor() const;
-
-  // Returns the short type for the captured variable at index.
-  // Index must be less than the number of captured variables.
-  ShortyFieldType GetCapturedShortyType(size_t index) const;
-
-  // Returns the 32-bit representation of a non-wide primitive at the captured variable index.
-  // Smaller types are zero extended.
-  // Index must be less than the number of captured variables.
-  uint32_t GetCapturedPrimitiveNarrow(size_t index) const;
-  // Returns the 64-bit representation of a wide primitive at the captured variable index.
-  // Smaller types are zero extended.
-  // Index must be less than the number of captured variables.
-  uint64_t GetCapturedPrimitiveWide(size_t index) const;
-  // Returns the object reference at the captured variable index.
-  // The type at the index *must* be an object reference or a CHECK failure will occur.
-  // Index must be less than the number of captured variables.
-  mirror::Object* GetCapturedObject(size_t index) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Gets the size of a nested capture closure in bytes, at the captured variable index.
-  // The type at the index *must* be a lambda closure or a CHECK failure will occur.
-  size_t GetCapturedClosureSize(size_t index) const;
-
-  // Copies a nested lambda closure at the captured variable index.
-  // The destination must have enough room for the closure (see GetCapturedClosureSize).
-  void CopyCapturedClosure(size_t index, void* destination, size_t destination_room) const;
-
- private:
-  // Read out any non-lambda value as a copy.
-  template <typename T>
-  T GetCapturedVariable(size_t index) const;
-
-  // Reconstruct the closure's captured variable info at runtime.
-  struct VariableInfo {
-    size_t index_;
-    ShortyFieldType variable_type_;
-    size_t offset_;
-    size_t count_;
-
-    enum Flags {
-      kIndex = 0x1,
-      kVariableType = 0x2,
-      kOffset = 0x4,
-      kCount = 0x8,
-    };
-
-    // Traverse to the end of the type descriptor list instead of stopping at some particular index.
-    static constexpr size_t kUpToIndexMax = static_cast<size_t>(-1);
-  };
-
-  // Parse a type descriptor, stopping at index "upto_index".
-  // Returns only the information requested in flags. All other fields are indeterminate.
-  template <VariableInfo::Flags flags>
-  inline VariableInfo ALWAYS_INLINE ParseTypeDescriptor(const char* type_descriptor,
-                                                        size_t upto_index) const;
-
-  // Convenience function to call ParseTypeDescriptor with just the type and offset.
-  void GetCapturedVariableTypeAndOffset(size_t index,
-                                        ShortyFieldType* out_type,
-                                        size_t* out_offset) const;
-
-  // How many bytes do the captured variables take up? Runtime sizeof(captured_variables).
-  size_t GetCapturedVariablesSize() const;
-  // Get the size in bytes of the variable_type which is potentially stored at offset.
-  size_t GetCapturedVariableSize(ShortyFieldType variable_type, size_t offset) const;
-  // Get the starting offset (in bytes) for the 0th captured variable.
-  // All offsets are relative to 'captured_'.
-  size_t GetStartingOffset() const;
-  // Get the offset for this index.
-  // All offsets are relative to 'captuerd_'.
-  size_t GetCapturedVariableOffset(size_t index) const;
-
-  // Cast the data at '(char*)captured_[offset]' into T, returning its address.
-  // This value should not be de-referenced directly since its unaligned.
-  template <typename T>
-  inline const uint8_t* GetUnsafeAtOffset(size_t offset) const;
-
-  // Copy the data at the offset into the destination. DCHECKs that
-  // the destination_room is large enough (in bytes) to fit the data.
-  template <typename T>
-  inline void CopyUnsafeAtOffset(size_t offset,
-                                 void* destination,
-                                 size_t src_size = sizeof(T),
-                                 size_t destination_room = sizeof(T)) const;
-
-  // Get the closure size from an unaligned (i.e. interior) closure pointer.
-  static size_t GetClosureSize(const uint8_t* closure);
-
-  ///////////////////////////////////////////////////////////////////////////////////
-
-  // Compile-time known lambda information such as the type descriptor and size.
-  ArtLambdaMethod* lambda_info_;
-
-  // A contiguous list of captured variables, and possibly the closure size.
-  // The runtime size can always be determined through GetSize().
-  union {
-    // Read from here if the closure size is static (ArtLambdaMethod::IsStatic)
-    uint8_t static_variables_[0];
-    struct {
-      // Read from here if the closure size is dynamic (ArtLambdaMethod::IsDynamic)
-      size_t size_;  // The lambda_info_ and the size_ itself is also included as part of the size.
-      uint8_t variables_[0];
-    } dynamic_;
-  } captured_[0];
-  // captured_ will always consist of one array element at runtime.
-  // Set to [0] so that 'size_' is not counted in sizeof(Closure).
-
-  friend class ClosureBuilder;
-  friend class ClosureTest;
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_H_
diff --git a/runtime/lambda/closure_builder-inl.h b/runtime/lambda/closure_builder-inl.h
deleted file mode 100644
index 3cec21f..0000000
--- a/runtime/lambda/closure_builder-inl.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
-
-#include "lambda/closure_builder.h"
-#include <string.h>
-
-namespace art {
-namespace lambda {
-
-template <typename T, ClosureBuilder::ShortyTypeEnum kShortyType>
-void ClosureBuilder::CaptureVariablePrimitive(T value) {
-  static_assert(ShortyFieldTypeTraits::IsPrimitiveType<T>(), "T must be a primitive type");
-  const size_t type_size = ShortyFieldType(kShortyType).GetStaticSize();
-  DCHECK_EQ(type_size, sizeof(T));
-
-  // Copy the data while retaining the bit pattern. Strict-aliasing safe.
-  ShortyFieldTypeTraits::MaxType value_storage = 0;
-  memcpy(&value_storage, &value, sizeof(T));
-
-  values_.push_back(value_storage);
-  size_ += sizeof(T);
-
-  shorty_types_ += kShortyType;
-}
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_INL_H_
diff --git a/runtime/lambda/closure_builder.cc b/runtime/lambda/closure_builder.cc
deleted file mode 100644
index 739e965..0000000
--- a/runtime/lambda/closure_builder.cc
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "lambda/closure_builder.h"
-
-#include "base/macros.h"
-#include "base/value_object.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/closure.h"
-#include "lambda/shorty_field_type.h"
-#include "runtime/mirror/object_reference.h"
-
-#include <stdint.h>
-#include <vector>
-
-namespace art {
-namespace lambda {
-
-/*
- * GC support TODOs:
- * (Although there's some code for storing objects, it is UNIMPLEMENTED(FATAL) because it is
- * incomplete).
- *
- * 1) GC needs to be able to traverse the Closure and visit any references.
- *    It might be possible to get away with global roots in the short term.
- *
- * 2) Add brooks read barrier support. We can store the black/gray/white bits
- *    in the lower 2 bits of the lambda art method pointer. Whenever a closure is copied
- *    [to the stack] we'd need to add a cold path to turn it black.
- *    (since there's only 3 colors, I can use the 4th value to indicate no-refs).
- *    e.g. 0x0 = gray, 0x1 = white, 0x2 = black, 0x3 = no-nested-references
- *    - Alternatively the GC can mark reference-less closures as always-black,
- *      although it would need extra work to check for references.
- */
-
-void ClosureBuilder::CaptureVariableObject(mirror::Object* object) {
-  auto compressed_reference = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(object);
-  ShortyFieldTypeTraits::MaxType storage = 0;
-
-  static_assert(sizeof(storage) >= sizeof(compressed_reference),
-                "not enough room to store a compressed reference");
-  memcpy(&storage, &compressed_reference, sizeof(compressed_reference));
-
-  values_.push_back(storage);
-  size_ += kObjectReferenceSize;
-
-  static_assert(kObjectReferenceSize == sizeof(compressed_reference), "reference size mismatch");
-
-  // TODO: needs more work to support concurrent GC
-  if (kIsDebugBuild) {
-    if (kUseReadBarrier) {
-      UNIMPLEMENTED(FATAL) << "can't yet safely capture objects with read barrier";
-    }
-  }
-
-  shorty_types_ += ShortyFieldType::kObject;
-}
-
-void ClosureBuilder::CaptureVariableLambda(Closure* closure) {
-  DCHECK(closure != nullptr);  // null closures not allowed, target method must be null instead.
-  values_.push_back(reinterpret_cast<ShortyFieldTypeTraits::MaxType>(closure));
-
-  if (LIKELY(is_dynamic_size_ == false)) {
-    // Write in the extra bytes to store the dynamic size the first time.
-    is_dynamic_size_ = true;
-    size_ += sizeof(Closure::captured_[0].dynamic_.size_);
-  }
-
-  // A closure may be sized dynamically, so always query it for the true size.
-  size_ += closure->GetSize();
-
-  shorty_types_ += ShortyFieldType::kLambda;
-}
-
-size_t ClosureBuilder::GetSize() const {
-  return size_;
-}
-
-size_t ClosureBuilder::GetCaptureCount() const {
-  DCHECK_EQ(values_.size(), shorty_types_.size());
-  return values_.size();
-}
-
-const std::string& ClosureBuilder::GetCapturedVariableShortyTypes() const {
-  DCHECK_EQ(values_.size(), shorty_types_.size());
-  return shorty_types_;
-}
-
-Closure* ClosureBuilder::CreateInPlace(void* memory, ArtLambdaMethod* target_method) const {
-  DCHECK(memory != nullptr);
-  DCHECK(target_method != nullptr);
-  DCHECK_EQ(is_dynamic_size_, target_method->IsDynamicSize());
-
-  CHECK_EQ(target_method->GetNumberOfCapturedVariables(), values_.size())
-    << "number of variables captured at runtime does not match "
-    << "number of variables captured at compile time";
-
-  Closure* closure = new (memory) Closure;
-  closure->lambda_info_ = target_method;
-
-  static_assert(offsetof(Closure, captured_) == kInitialSize, "wrong initial size");
-
-  size_t written_size;
-  if (UNLIKELY(is_dynamic_size_)) {
-    // The closure size must be set dynamically (i.e. nested lambdas).
-    closure->captured_[0].dynamic_.size_ = GetSize();
-    size_t header_size = offsetof(Closure, captured_[0].dynamic_.variables_);
-    DCHECK_LE(header_size, GetSize());
-    size_t variables_size = GetSize() - header_size;
-    written_size =
-        WriteValues(target_method,
-                    closure->captured_[0].dynamic_.variables_,
-                    header_size,
-                    variables_size);
-  } else {
-    // The closure size is known statically (i.e. no nested lambdas).
-    DCHECK(GetSize() == target_method->GetStaticClosureSize());
-    size_t header_size = offsetof(Closure, captured_[0].static_variables_);
-    DCHECK_LE(header_size, GetSize());
-    size_t variables_size = GetSize() - header_size;
-    written_size =
-        WriteValues(target_method,
-                    closure->captured_[0].static_variables_,
-                    header_size,
-                    variables_size);
-  }
-
-  DCHECK_EQ(written_size, closure->GetSize());
-
-  return closure;
-}
-
-size_t ClosureBuilder::WriteValues(ArtLambdaMethod* target_method,
-                                   uint8_t variables[],
-                                   size_t header_size,
-                                   size_t variables_size) const {
-  size_t total_size = header_size;
-  const char* shorty_types = target_method->GetCapturedVariablesShortyTypeDescriptor();
-  DCHECK_STREQ(shorty_types, shorty_types_.c_str());
-
-  size_t variables_offset = 0;
-  size_t remaining_size = variables_size;
-
-  const size_t shorty_count = target_method->GetNumberOfCapturedVariables();
-  DCHECK_EQ(shorty_count, GetCaptureCount());
-
-  for (size_t i = 0; i < shorty_count; ++i) {
-    ShortyFieldType shorty{shorty_types[i]};  // NOLINT [readability/braces] [4]
-
-    size_t var_size;
-    if (LIKELY(shorty.IsStaticSize())) {
-      // TODO: needs more work to support concurrent GC, e.g. read barriers
-      if (kUseReadBarrier == false) {
-        if (UNLIKELY(shorty.IsObject())) {
-          UNIMPLEMENTED(FATAL) << "can't yet safely write objects with read barrier";
-        }
-      } else {
-        if (UNLIKELY(shorty.IsObject())) {
-          UNIMPLEMENTED(FATAL) << "writing objects not yet supported, no GC support";
-        }
-      }
-
-      var_size = shorty.GetStaticSize();
-      DCHECK_LE(var_size, sizeof(values_[i]));
-
-      // Safe even for objects (non-read barrier case) if we never suspend
-      // while the ClosureBuilder is live.
-      // FIXME: Need to add GC support for references in a closure.
-      memcpy(&variables[variables_offset], &values_[i], var_size);
-    } else {
-      DCHECK(shorty.IsLambda())
-          << " don't support writing dynamically sized types other than lambda";
-
-      ShortyFieldTypeTraits::MaxType closure_raw = values_[i];
-      Closure* nested_closure = reinterpret_cast<Closure*>(closure_raw);
-
-      DCHECK(nested_closure != nullptr);
-      nested_closure->CopyTo(&variables[variables_offset], remaining_size);
-
-      var_size = nested_closure->GetSize();
-    }
-
-    total_size += var_size;
-    DCHECK_GE(remaining_size, var_size);
-    remaining_size -= var_size;
-
-    variables_offset += var_size;
-  }
-
-  DCHECK_EQ('\0', shorty_types[shorty_count]);
-  DCHECK_EQ(variables_offset, variables_size);
-
-  return total_size;
-}
-
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/closure_builder.h b/runtime/lambda/closure_builder.h
deleted file mode 100644
index 23eb484..0000000
--- a/runtime/lambda/closure_builder.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
-#define ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
-
-#include "base/macros.h"
-#include "base/mutex.h"  // For Locks::mutator_lock_.
-#include "base/value_object.h"
-#include "lambda/shorty_field_type.h"
-
-#include <stdint.h>
-#include <vector>
-
-namespace art {
-class ArtMethod;  // forward declaration
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-class ArtLambdaMethod;  // forward declaration
-
-// Build a closure by capturing variables one at a time.
-// When all variables have been marked captured, the closure can be created in-place into
-// a target memory address.
-//
-// The mutator lock must be held for the duration of the lifetime of this object,
-// since it needs to temporarily store heap references into an internal list.
-class ClosureBuilder {
- public:
-  using ShortyTypeEnum = decltype(ShortyFieldType::kByte);
-
-  // Mark this primitive value to be captured as the specified type.
-  template <typename T, ShortyTypeEnum kShortyType = ShortyFieldTypeSelectEnum<T>::value>
-  void CaptureVariablePrimitive(T value);
-
-  // Mark this object reference to be captured.
-  void CaptureVariableObject(mirror::Object* object) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Mark this lambda closure to be captured.
-  void CaptureVariableLambda(Closure* closure);
-
-  // Get the size (in bytes) of the closure.
-  // This size is used to be able to allocate memory large enough to write the closure into.
-  // Call 'CreateInPlace' to actually write the closure out.
-  size_t GetSize() const;
-
-  // Returns how many variables have been captured so far.
-  size_t GetCaptureCount() const;
-
-  // Get the list of captured variables' shorty field types.
-  const std::string& GetCapturedVariableShortyTypes() const;
-
-  // Creates a closure in-place and writes out the data into 'memory'.
-  // Memory must be at least 'GetSize' bytes large.
-  // All previously marked data to be captured is now written out.
-  Closure* CreateInPlace(void* memory, ArtLambdaMethod* target_method) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Locks need to be held for entire lifetime of ClosureBuilder.
-  ClosureBuilder() SHARED_REQUIRES(Locks::mutator_lock_)
-  {}
-
-  // Locks need to be held for entire lifetime of ClosureBuilder.
-  ~ClosureBuilder() SHARED_REQUIRES(Locks::mutator_lock_)
-  {}
-
- private:
-  // Initial size a closure starts out before any variables are written.
-  // Header size only.
-  static constexpr size_t kInitialSize = sizeof(ArtLambdaMethod*);
-
-  // Write a Closure's variables field from the captured variables.
-  // variables_size specified in bytes, and only includes enough room to write variables into.
-  // Returns the calculated actual size of the closure.
-  size_t WriteValues(ArtLambdaMethod* target_method,
-                     uint8_t variables[],
-                     size_t header_size,
-                     size_t variables_size) const SHARED_REQUIRES(Locks::mutator_lock_);
-
-  size_t size_ = kInitialSize;
-  bool is_dynamic_size_ = false;
-  std::vector<ShortyFieldTypeTraits::MaxType> values_;
-  std::string shorty_types_;
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_CLOSURE_BUILDER_H_
diff --git a/runtime/lambda/closure_test.cc b/runtime/lambda/closure_test.cc
deleted file mode 100644
index 7c1bd0d..0000000
--- a/runtime/lambda/closure_test.cc
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method.h"
-#include "lambda/art_lambda_method.h"
-#include "lambda/closure.h"
-#include "lambda/closure_builder.h"
-#include "lambda/closure_builder-inl.h"
-#include "utils.h"
-
-#include <numeric>
-#include <stdint.h>
-#include <type_traits>
-#include "gtest/gtest.h"
-
-// Turn this on for some extra printfs to help with debugging, since some code is optimized out.
-static constexpr const bool kDebuggingClosureTest = true;
-
-namespace std {
-  using Closure = art::lambda::Closure;
-
-  // Specialize std::default_delete so it knows how to properly delete closures
-  // through the way we allocate them in this test.
-  //
-  // This is test-only because we don't want the rest of Art to do this.
-  template <>
-  struct default_delete<Closure> {
-    void operator()(Closure* closure) const {
-      delete[] reinterpret_cast<char*>(closure);
-    }
-  };
-}  // namespace std
-
-namespace art {
-
-// Fake lock acquisition to please clang lock checker.
-// This doesn't actually acquire any locks because we don't need multiple threads in this gtest.
-struct SCOPED_CAPABILITY ScopedFakeLock {
-  explicit ScopedFakeLock(MutatorMutex& mu) ACQUIRE(mu)
-      : mu_(mu) {
-  }
-
-  ~ScopedFakeLock() RELEASE()
-  {}
-
-  MutatorMutex& mu_;
-};
-
-namespace lambda {
-
-class ClosureTest : public ::testing::Test {
- public:
-  ClosureTest() = default;
-  ~ClosureTest() = default;
-
- protected:
-  static void SetUpTestCase() {
-  }
-
-  virtual void SetUp() {
-    // Create a completely dummy method here.
-    // It's "OK" because the Closure never needs to look inside of the ArtMethod
-    // (it just needs to be non-null).
-    uintptr_t ignore = 0xbadbad;
-    fake_method_ = reinterpret_cast<ArtMethod*>(ignore);
-  }
-
-  static ::testing::AssertionResult IsResultSuccessful(bool result) {
-    if (result) {
-      return ::testing::AssertionSuccess();
-    } else {
-      return ::testing::AssertionFailure();
-    }
-  }
-
-  // Create a closure that captures the static variables from 'args' by-value.
-  // The lambda method's captured variables types must match the ones in 'args'.
-  // -- This creates the closure directly in-memory by using memcpy.
-  template <typename ... Args>
-  static std::unique_ptr<Closure> CreateClosureStaticVariables(ArtLambdaMethod* lambda_method,
-                                                               Args&& ... args) {
-    constexpr size_t header_size = sizeof(ArtLambdaMethod*);
-    const size_t static_size = GetArgsSize(args ...) + header_size;
-    EXPECT_GE(static_size, sizeof(Closure));
-
-    // Can't just 'new' the Closure since we don't know the size up front.
-    char* closure_as_char_array = new char[static_size];
-    Closure* closure_ptr = new (closure_as_char_array) Closure;
-
-    // Set up the data
-    closure_ptr->lambda_info_ = lambda_method;
-    CopyArgs(closure_ptr->captured_[0].static_variables_, args ...);
-
-    // Make sure the entire thing is deleted once the unique_ptr goes out of scope.
-    return std::unique_ptr<Closure>(closure_ptr);  // NOLINT [whitespace/braces] [5]
-  }
-
-  // Copy variadic arguments into the destination array with memcpy.
-  template <typename T, typename ... Args>
-  static void CopyArgs(uint8_t destination[], T&& arg, Args&& ... args) {
-    memcpy(destination, &arg, sizeof(arg));
-    CopyArgs(destination + sizeof(arg), args ...);
-  }
-
-  // Base case: Done.
-  static void CopyArgs(uint8_t destination[]) {
-    UNUSED(destination);
-  }
-
-  // Create a closure that captures the static variables from 'args' by-value.
-  // The lambda method's captured variables types must match the ones in 'args'.
-  // -- This uses ClosureBuilder interface to set up the closure indirectly.
-  template <typename ... Args>
-  static std::unique_ptr<Closure> CreateClosureStaticVariablesFromBuilder(
-      ArtLambdaMethod* lambda_method,
-      Args&& ... args) {
-    // Acquire a fake lock since closure_builder needs it.
-    ScopedFakeLock fake_lock(*Locks::mutator_lock_);
-
-    ClosureBuilder closure_builder;
-    CaptureVariableFromArgsList(/*out*/closure_builder, args ...);
-
-    EXPECT_EQ(sizeof...(args), closure_builder.GetCaptureCount());
-
-    constexpr size_t header_size = sizeof(ArtLambdaMethod*);
-    const size_t static_size = GetArgsSize(args ...) + header_size;
-    EXPECT_GE(static_size, sizeof(Closure));
-
-    // For static variables, no nested closure, so size must match exactly.
-    EXPECT_EQ(static_size, closure_builder.GetSize());
-
-    // Can't just 'new' the Closure since we don't know the size up front.
-    char* closure_as_char_array = new char[static_size];
-    Closure* closure_ptr = new (closure_as_char_array) Closure;
-
-    // The closure builder packs the captured variables into a Closure.
-    closure_builder.CreateInPlace(closure_ptr, lambda_method);
-
-    // Make sure the entire thing is deleted once the unique_ptr goes out of scope.
-    return std::unique_ptr<Closure>(closure_ptr);  // NOLINT [whitespace/braces] [5]
-  }
-
-  // Call the correct ClosureBuilder::CaptureVariableXYZ function based on the type of args.
-  // Invokes for each arg in args.
-  template <typename ... Args>
-  static void CaptureVariableFromArgsList(/*out*/ClosureBuilder& closure_builder, Args ... args) {
-    int ignore[] = {
-        (CaptureVariableFromArgs(/*out*/closure_builder, args),0)...  // NOLINT [whitespace/comma] [3]
-    };
-    UNUSED(ignore);
-  }
-
-  // ClosureBuilder::CaptureVariablePrimitive for types that are primitive only.
-  template <typename T>
-  typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveType<T>()>::type
-  static CaptureVariableFromArgs(/*out*/ClosureBuilder& closure_builder, T value) {
-    static_assert(ShortyFieldTypeTraits::IsPrimitiveType<T>(), "T must be a shorty primitive");
-    closure_builder.CaptureVariablePrimitive<T, ShortyFieldTypeSelectEnum<T>::value>(value);
-  }
-
-  // ClosureBuilder::CaptureVariableObject for types that are objects only.
-  template <typename T>
-  typename std::enable_if<ShortyFieldTypeTraits::IsObjectType<T>()>::type
-  static CaptureVariableFromArgs(/*out*/ClosureBuilder& closure_builder, const T* object) {
-    ScopedFakeLock fake_lock(*Locks::mutator_lock_);
-    closure_builder.CaptureVariableObject(object);
-  }
-
-  // Sum of sizeof(Args...).
-  template <typename T, typename ... Args>
-  static constexpr size_t GetArgsSize(T&& arg, Args&& ... args) {
-    return sizeof(arg) + GetArgsSize(args ...);
-  }
-
-  // Base case: Done.
-  static constexpr size_t GetArgsSize() {
-    return 0;
-  }
-
-  // Take "U" and memcpy it into a "T". T starts out as (T)0.
-  template <typename T, typename U>
-  static T ExpandingBitCast(const U& val) {
-    static_assert(sizeof(T) >= sizeof(U), "U too large");
-    T new_val = static_cast<T>(0);
-    memcpy(&new_val, &val, sizeof(U));
-    return new_val;
-  }
-
-  // Templatized extraction from closures by checking their type with enable_if.
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveNarrowType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, T value) {
-    EXPECT_EQ(ExpandingBitCast<uint32_t>(value), closure->GetCapturedPrimitiveNarrow(index))
-        << " with index " << index;
-  }
-
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsPrimitiveWideType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, T value) {
-    EXPECT_EQ(ExpandingBitCast<uint64_t>(value), closure->GetCapturedPrimitiveWide(index))
-        << " with index " << index;
-  }
-
-  // Templatized SFINAE for Objects so we can get better error messages.
-  template <typename T>
-  static typename std::enable_if<ShortyFieldTypeTraits::IsObjectType<T>()>::type
-  ExpectCapturedVariable(const Closure* closure, size_t index, const T* object) {
-    EXPECT_EQ(object, closure->GetCapturedObject(index))
-        << " with index " << index;
-  }
-
-  template <typename ... Args>
-  void TestPrimitive(const char *descriptor, Args ... args) {
-    const char* shorty = descriptor;
-
-    SCOPED_TRACE(descriptor);
-
-    ASSERT_EQ(strlen(shorty), sizeof...(args))
-        << "test error: descriptor must have same # of types as the # of captured variables";
-
-    // Important: This fake lambda method needs to out-live any Closures we create with it.
-    ArtLambdaMethod lambda_method{fake_method_,                    // NOLINT [whitespace/braces] [5]
-                                  descriptor,                      // NOLINT [whitespace/blank_line] [2]
-                                  shorty,
-                                 };
-
-    std::unique_ptr<Closure> closure_a;
-    std::unique_ptr<Closure> closure_b;
-
-    // Test the closure twice when it's constructed in different ways.
-    {
-      // Create the closure in a "raw" manner, that is directly with memcpy
-      // since we know the underlying data format.
-      // This simulates how the compiler would lay out the data directly.
-      SCOPED_TRACE("raw closure");
-      std::unique_ptr<Closure> closure_raw = CreateClosureStaticVariables(&lambda_method, args ...);
-
-      if (kDebuggingClosureTest) {
-        std::cerr << "closure raw address: " << closure_raw.get() << std::endl;
-      }
-      TestPrimitiveWithClosure(closure_raw.get(), descriptor, shorty, args ...);
-      closure_a = std::move(closure_raw);
-    }
-
-    {
-      // Create the closure with the ClosureBuilder, which is done indirectly.
-      // This simulates how the interpreter would create the closure dynamically at runtime.
-      SCOPED_TRACE("closure from builder");
-      std::unique_ptr<Closure> closure_built =
-          CreateClosureStaticVariablesFromBuilder(&lambda_method, args ...);
-      if (kDebuggingClosureTest) {
-        std::cerr << "closure built address: " << closure_built.get() << std::endl;
-      }
-      TestPrimitiveWithClosure(closure_built.get(), descriptor, shorty, args ...);
-      closure_b = std::move(closure_built);
-    }
-
-    // The closures should be identical memory-wise as well.
-    EXPECT_EQ(closure_a->GetSize(), closure_b->GetSize());
-    EXPECT_TRUE(memcmp(closure_a.get(),
-                       closure_b.get(),
-                       std::min(closure_a->GetSize(), closure_b->GetSize())) == 0);
-  }
-
-  template <typename ... Args>
-  static void TestPrimitiveWithClosure(Closure* closure,
-                                       const char* descriptor,
-                                       const char* shorty,
-                                       Args ... args) {
-    EXPECT_EQ(sizeof(ArtLambdaMethod*) + GetArgsSize(args...), closure->GetSize());
-    EXPECT_EQ(sizeof...(args), closure->GetNumberOfCapturedVariables());
-    EXPECT_STREQ(descriptor, closure->GetCapturedVariablesTypeDescriptor());
-    TestPrimitiveExpects(closure, shorty, /*index*/0, args ...);
-  }
-
-  // Call EXPECT_EQ for each argument in the closure's #GetCapturedX.
-  template <typename T, typename ... Args>
-  static void TestPrimitiveExpects(
-      const Closure* closure, const char* shorty, size_t index, T arg, Args ... args) {
-    ASSERT_EQ(ShortyFieldType(shorty[index]).GetStaticSize(), sizeof(T))
-        << "Test error: Type mismatch at index " << index;
-    ExpectCapturedVariable(closure, index, arg);
-    EXPECT_EQ(ShortyFieldType(shorty[index]), closure->GetCapturedShortyType(index));
-    TestPrimitiveExpects(closure, shorty, index + 1, args ...);
-  }
-
-  // Base case for EXPECT_EQ.
-  static void TestPrimitiveExpects(const Closure* closure, const char* shorty, size_t index) {
-    UNUSED(closure, shorty, index);
-  }
-
-  ArtMethod* fake_method_;
-};
-
-TEST_F(ClosureTest, TestTrivial) {
-  ArtLambdaMethod lambda_method{fake_method_,                    // NOLINT [whitespace/braces] [5]
-                                "",  // No captured variables    // NOLINT [whitespace/blank_line] [2]
-                                "",  // No captured variables
-                               };
-
-  std::unique_ptr<Closure> closure = CreateClosureStaticVariables(&lambda_method);
-
-  EXPECT_EQ(sizeof(ArtLambdaMethod*), closure->GetSize());
-  EXPECT_EQ(0u, closure->GetNumberOfCapturedVariables());
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveSingle) {
-  TestPrimitive("Z", true);
-  TestPrimitive("B", int8_t(0xde));
-  TestPrimitive("C", uint16_t(0xbeef));
-  TestPrimitive("S", int16_t(0xdead));
-  TestPrimitive("I", int32_t(0xdeadbeef));
-  TestPrimitive("F", 0.123f);
-  TestPrimitive("J", int64_t(0xdeadbeef00c0ffee));
-  TestPrimitive("D", 123.456);
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveMany) {
-  TestPrimitive("ZZ", true, false);
-  TestPrimitive("ZZZ", true, false, true);
-  TestPrimitive("BBBB", int8_t(0xde), int8_t(0xa0), int8_t(0xff), int8_t(0xcc));
-  TestPrimitive("CC", uint16_t(0xbeef), uint16_t(0xdead));
-  TestPrimitive("SSSS", int16_t(0xdead), int16_t(0xc0ff), int16_t(0xf000), int16_t(0xbaba));
-  TestPrimitive("III", int32_t(0xdeadbeef), int32_t(0xc0ffee), int32_t(0xbeefdead));
-  TestPrimitive("FF", 0.123f, 555.666f);
-  TestPrimitive("JJJ", int64_t(0xdeadbeef00c0ffee), int64_t(0x123), int64_t(0xc0ffee));
-  TestPrimitive("DD", 123.456, 777.888);
-}  // TEST_F
-
-TEST_F(ClosureTest, TestPrimitiveMixed) {
-  TestPrimitive("ZZBBCCSSIIFFJJDD",
-                true, false,
-                int8_t(0xde), int8_t(0xa0),
-                uint16_t(0xbeef), uint16_t(0xdead),
-                int16_t(0xdead), int16_t(0xc0ff),
-                int32_t(0xdeadbeef), int32_t(0xc0ffee),
-                0.123f, 555.666f,
-                int64_t(0xdeadbeef00c0ffee), int64_t(0x123),
-                123.456, 777.888);
-}  // TEST_F
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/leaking_allocator.cc b/runtime/lambda/leaking_allocator.cc
deleted file mode 100644
index 22bb294..0000000
--- a/runtime/lambda/leaking_allocator.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/bit_utils.h"
-#include "lambda/leaking_allocator.h"
-#include "linear_alloc.h"
-#include "runtime.h"
-
-namespace art {
-namespace lambda {
-
-void* LeakingAllocator::AllocateMemoryImpl(Thread* self, size_t byte_size, size_t align_size) {
-  // TODO: use GetAllocatorForClassLoader to allocate lambda ArtMethod data.
-  void* mem = Runtime::Current()->GetLinearAlloc()->Alloc(self, byte_size);
-  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(mem), align_size);
-  return mem;
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lambda/leaking_allocator.h b/runtime/lambda/leaking_allocator.h
deleted file mode 100644
index cb5a1bf..0000000
--- a/runtime/lambda/leaking_allocator.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
-#define ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
-
-#include <utility>  // std::forward
-#include <type_traits>  // std::aligned_storage
-
-namespace art {
-class Thread;  // forward declaration
-
-namespace lambda {
-
-// Temporary class to centralize all the leaking allocations.
-// Allocations made through this class are never freed, but it is a placeholder
-// that means that the calling code needs to be rewritten to properly:
-//
-// (a) Have a lifetime scoped to some other entity.
-// (b) Not be allocated over and over again if it was already allocated once (immutable data).
-//
-// TODO: do all of the above a/b for each callsite, and delete this class.
-class LeakingAllocator {
- public:
-  // An opaque type which is guaranteed for:
-  // * a) be large enough to hold T (e.g. for in-place new)
-  // * b) be well-aligned (so that reads/writes are well-defined) to T
-  // * c) strict-aliasing compatible with T*
-  //
-  // Nominally used to allocate memory for yet unconstructed instances of T.
-  template <typename T>
-  using AlignedMemoryStorage = typename std::aligned_storage<sizeof(T), alignof(T)>::type;
-
-  // Allocate byte_size bytes worth of memory. Never freed.
-  template <typename T>
-  static AlignedMemoryStorage<T>* AllocateMemory(Thread* self, size_t byte_size = sizeof(T)) {
-    return reinterpret_cast<AlignedMemoryStorage<T>*>(
-        AllocateMemoryImpl(self, byte_size, alignof(T)));
-  }
-
-  // Make a new instance of T, flexibly sized, in-place at newly allocated memory. Never freed.
-  template <typename T, typename... Args>
-  static T* MakeFlexibleInstance(Thread* self, size_t byte_size, Args&&... args) {
-    return new (AllocateMemory<T>(self, byte_size)) T(std::forward<Args>(args)...);
-  }
-
-  // Make a new instance of T in-place at newly allocated memory. Never freed.
-  template <typename T, typename... Args>
-  static T* MakeInstance(Thread* self, Args&&... args) {
-    return new (AllocateMemory<T>(self, sizeof(T))) T(std::forward<Args>(args)...);
-  }
-
- private:
-  static void* AllocateMemoryImpl(Thread* self, size_t byte_size, size_t align_size);
-};
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
diff --git a/runtime/lambda/shorty_field_type.h b/runtime/lambda/shorty_field_type.h
deleted file mode 100644
index 46ddaa9..0000000
--- a/runtime/lambda/shorty_field_type.h
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
-#define ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "base/value_object.h"
-#include "globals.h"
-#include "runtime/primitive.h"
-
-#include <ostream>
-
-namespace art {
-
-namespace mirror {
-class Object;  // forward declaration
-}  // namespace mirror
-
-namespace lambda {
-
-struct Closure;  // forward declaration
-
-// TODO: Refactor together with primitive.h
-
-// The short form of a field type descriptor. Corresponds to ShortyFieldType in dex specification.
-// Only types usable by a field (and locals) are allowed (i.e. no void type).
-// Note that arrays and objects are treated both as 'L'.
-//
-// This is effectively a 'char' enum-like zero-cost type-safe wrapper with extra helper functions.
-struct ShortyFieldType : ValueObject {
-  // Use as if this was an enum class, e.g. 'ShortyFieldType::kBoolean'.
-  enum : char {
-    // Primitives (Narrow):
-    kBoolean = 'Z',
-    kByte = 'B',
-    kChar = 'C',
-    kShort = 'S',
-    kInt = 'I',
-    kFloat = 'F',
-    // Primitives (Wide):
-    kLong = 'J',
-    kDouble = 'D',
-    // Managed types:
-    kObject = 'L',  // This can also be an array (which is otherwise '[' in a non-shorty).
-    kLambda = '\\',
-  };  // NOTE: This is an anonymous enum so we can get exhaustive switch checking from the compiler.
-
-  // Implicitly construct from the enum above. Value must be one of the enum list members above.
-  // Always safe to use, does not do any DCHECKs.
-  inline constexpr ShortyFieldType(decltype(kByte) c) : value_(c) {
-  }
-
-  // Default constructor. The initial value is undefined. Initialize before calling methods.
-  // This is very unsafe but exists as a convenience to having undefined values.
-  explicit ShortyFieldType() : value_(StaticCastValue(0)) {
-  }
-
-  // Explicitly construct from a char. Value must be one of the enum list members above.
-  // Conversion is potentially unsafe, so DCHECKing is performed.
-  explicit inline ShortyFieldType(char c) : value_(StaticCastValue(c)) {
-    if (kIsDebugBuild) {
-      // Verify at debug-time that our conversion is safe.
-      ShortyFieldType ignored;
-      DCHECK(MaybeCreate(c, &ignored)) << "unknown shorty field type '" << c << "'";
-    }
-  }
-
-  // Attempts to parse the character in 'shorty_field_type' into its strongly typed version.
-  // Returns false if the character was out of range of the grammar.
-  static bool MaybeCreate(char shorty_field_type, ShortyFieldType* out) {
-    DCHECK(out != nullptr);
-    switch (shorty_field_type) {
-      case kBoolean:
-      case kByte:
-      case kChar:
-      case kShort:
-      case kInt:
-      case kFloat:
-      case kLong:
-      case kDouble:
-      case kObject:
-      case kLambda:
-        *out = ShortyFieldType(static_cast<decltype(kByte)>(shorty_field_type));
-        return true;
-      default:
-        break;
-    }
-
-    return false;
-  }
-
-  // Convert the first type in a field type descriptor string into a shorty.
-  // Arrays are converted into objects.
-  // Does not work for 'void' types (as they are illegal in a field type descriptor).
-  static ShortyFieldType CreateFromFieldTypeDescriptor(const char* field_type_descriptor) {
-    DCHECK(field_type_descriptor != nullptr);
-    char c = *field_type_descriptor;
-    if (UNLIKELY(c == kArray)) {  // Arrays are treated as object references.
-      c = kObject;
-    }
-    return ShortyFieldType{c};  // NOLINT [readability/braces] [4]
-  }
-
-  // Parse the first type in the field type descriptor string into a shorty.
-  // See CreateFromFieldTypeDescriptor for more details.
-  //
-  // Returns the pointer offset into the middle of the field_type_descriptor
-  // that would either point to the next shorty type, or to null if there are
-  // no more types.
-  //
-  // DCHECKs that each of the nested types is a valid shorty field type. This
-  // means the type descriptor must be already valid.
-  static const char* ParseFromFieldTypeDescriptor(const char* field_type_descriptor,
-                                                  ShortyFieldType* out_type) {
-    DCHECK(field_type_descriptor != nullptr);
-
-    if (UNLIKELY(field_type_descriptor[0] == '\0')) {
-      // Handle empty strings by immediately returning null.
-      return nullptr;
-    }
-
-    // All non-empty strings must be a valid list of field type descriptors, otherwise
-    // the DCHECKs will kick in and the program will crash.
-    const char shorter_type = *field_type_descriptor;
-
-    ShortyFieldType safe_type;
-    bool type_set = MaybeCreate(shorter_type, &safe_type);
-
-    // Lambda that keeps skipping characters until it sees ';'.
-    // Stops one character -after- the ';'.
-    auto skip_until_semicolon = [&field_type_descriptor]() {
-      while (*field_type_descriptor != ';' && *field_type_descriptor != '\0') {
-        ++field_type_descriptor;
-      }
-      DCHECK_NE(*field_type_descriptor, '\0')
-          << " type descriptor terminated too early: " << field_type_descriptor;
-      ++field_type_descriptor;  // Skip the ';'
-    };
-
-    ++field_type_descriptor;
-    switch (shorter_type) {
-      case kObject:
-        skip_until_semicolon();
-
-        DCHECK(type_set);
-        DCHECK(safe_type == kObject);
-        break;
-      case kArray:
-        // Strip out all of the leading [[[[[s, we don't care if it's a multi-dimensional array.
-        while (*field_type_descriptor == '[' && *field_type_descriptor != '\0') {
-          ++field_type_descriptor;
-        }
-        DCHECK_NE(*field_type_descriptor, '\0')
-            << " type descriptor terminated too early: " << field_type_descriptor;
-        // Either a primitive, object, or closure left. No more arrays.
-        {
-          // Now skip all the characters that form the array's interior-most element type
-          // (which itself is guaranteed not to be an array).
-          ShortyFieldType array_interior_type;
-          type_set = MaybeCreate(*field_type_descriptor, &array_interior_type);
-          DCHECK(type_set) << " invalid remaining type descriptor " << field_type_descriptor;
-
-          // Handle array-of-objects case like [[[[[LObject; and array-of-closures like [[[[[\Foo;
-          if (*field_type_descriptor == kObject || *field_type_descriptor == kLambda) {
-            skip_until_semicolon();
-          } else {
-            // Handle primitives which are exactly one character we can skip.
-            DCHECK(array_interior_type.IsPrimitive());
-            ++field_type_descriptor;
-          }
-        }
-
-        safe_type = kObject;
-        type_set = true;
-        break;
-      case kLambda:
-        skip_until_semicolon();
-
-        DCHECK(safe_type == kLambda);
-        DCHECK(type_set);
-        break;
-      default:
-        DCHECK_NE(kVoid, shorter_type) << "cannot make a ShortyFieldType from a void type";
-        break;
-    }
-
-    DCHECK(type_set) << "invalid shorty type descriptor " << shorter_type;
-
-    *out_type = safe_type;
-    return type_set ? field_type_descriptor : nullptr;
-  }
-
-  // Explicitly convert to a char.
-  inline explicit operator char() const {
-    return value_;
-  }
-
-  // Is this a primitive?
-  inline bool IsPrimitive() const {
-    return IsPrimitiveNarrow() || IsPrimitiveWide();
-  }
-
-  // Is this a narrow primitive (i.e. can fit into 1 virtual register)?
-  inline bool IsPrimitiveNarrow() const {
-    switch (value_) {
-      case kBoolean:
-      case kByte:
-      case kChar:
-      case kShort:
-      case kInt:
-      case kFloat:
-        return true;
-      default:
-        return false;
-    }
-  }
-
-  // Is this a wide primitive (i.e. needs exactly 2 virtual registers)?
-  inline bool IsPrimitiveWide() const {
-    switch (value_) {
-      case kLong:
-      case kDouble:
-        return true;
-      default:
-        return false;
-    }
-  }
-
-  // Is this an object reference (which can also be an array)?
-  inline bool IsObject() const {
-    return value_ == kObject;
-  }
-
-  // Is this a lambda?
-  inline bool IsLambda() const {
-    return value_ == kLambda;
-  }
-
-  // Is the size of this (to store inline as a field) always known at compile-time?
-  inline bool IsStaticSize() const {
-    return !IsLambda();
-  }
-
-  // Get the compile-time size (to be able to store it inline as a field or on stack).
-  // Dynamically-sized values such as lambdas return the guaranteed lower bound.
-  inline size_t GetStaticSize() const {
-    switch (value_) {
-      case kBoolean:
-        return sizeof(bool);
-      case kByte:
-        return sizeof(uint8_t);
-      case kChar:
-        return sizeof(int16_t);
-      case kShort:
-        return sizeof(uint16_t);
-      case kInt:
-        return sizeof(int32_t);
-      case kLong:
-        return sizeof(int64_t);
-      case kFloat:
-        return sizeof(float);
-      case kDouble:
-        return sizeof(double);
-      case kObject:
-        return kObjectReferenceSize;
-      case kLambda:
-        return sizeof(void*);  // Large enough to store the ArtLambdaMethod
-      default:
-        DCHECK(false) << "unknown shorty field type '" << static_cast<char>(value_) << "'";
-        UNREACHABLE();
-    }
-  }
-
-  // Implicitly convert to the anonymous nested inner type. Used for exhaustive switch detection.
-  inline operator decltype(kByte)() const {
-    return value_;
-  }
-
-  // Returns a read-only static string representing the enum name, useful for printing/debug only.
-  inline const char* ToString() const {
-    switch (value_) {
-      case kBoolean:
-        return "kBoolean";
-      case kByte:
-        return "kByte";
-      case kChar:
-        return "kChar";
-      case kShort:
-        return "kShort";
-      case kInt:
-        return "kInt";
-      case kLong:
-        return "kLong";
-      case kFloat:
-        return "kFloat";
-      case kDouble:
-        return "kDouble";
-      case kObject:
-        return "kObject";
-      case kLambda:
-        return "kLambda";
-      default:
-        // Undefined behavior if we get this far. Pray the compiler gods are merciful.
-        return "<undefined>";
-    }
-  }
-
- private:
-  static constexpr const char kArray = '[';
-  static constexpr const char kVoid  = 'V';
-
-  // Helper to statically cast anything into our nested anonymous enum type.
-  template <typename T>
-  inline static decltype(kByte) StaticCastValue(const T& anything) {
-    return static_cast<decltype(value_)>(anything);
-  }
-
-  // The only field in this struct.
-  decltype(kByte) value_;
-};
-
-
-  // Print to an output stream.
-inline std::ostream& operator<<(std::ostream& ostream, ShortyFieldType shorty) {
-  return ostream << shorty.ToString();
-}
-
-static_assert(sizeof(ShortyFieldType) == sizeof(char),
-              "ShortyFieldType must be lightweight just like a char");
-
-// Compile-time trait information regarding the ShortyFieldType.
-// Used by static_asserts to verify that the templates are correctly used at compile-time.
-//
-// For example,
-//     ShortyFieldTypeTraits::IsPrimitiveNarrowType<int64_t>() == true
-//     ShortyFieldTypeTraits::IsObjectType<mirror::Object*>() == true
-struct ShortyFieldTypeTraits {
-  // A type guaranteed to be large enough to holds any of the shorty field types.
-  using MaxType = uint64_t;
-
-  // Type traits: Returns true if 'T' is a valid type that can be represented by a shorty field type.
-  template <typename T>
-  static inline constexpr bool IsType() {
-    return IsPrimitiveType<T>() || IsObjectType<T>() || IsLambdaType<T>();
-  }
-
-  // Returns true if 'T' is a primitive type (i.e. a built-in without nested references).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveType() {
-    return IsPrimitiveNarrowType<T>() || IsPrimitiveWideType<T>();
-  }
-
-  // Returns true if 'T' is a primitive type that is narrow (i.e. can be stored into 1 vreg).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveNarrowType() {
-    return IsPrimitiveNarrowTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is a primitive type that is wide (i.e. needs 2 vregs for storage).
-  template <typename T>
-  static inline constexpr bool IsPrimitiveWideType() {
-    return IsPrimitiveWideTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is an object (i.e. it is a managed GC reference).
-  // Note: This is equivalent to std::base_of<mirror::Object*, T>::value
-  template <typename T>
-  static inline constexpr bool IsObjectType() {
-    return IsObjectTypeImpl(static_cast<T* const>(nullptr));
-  }
-
-  // Returns true if 'T' is a lambda (i.e. it is a closure with unknown static data);
-  template <typename T>
-  static inline constexpr bool IsLambdaType() {
-    return IsLambdaTypeImpl(static_cast<T* const>(nullptr));
-  }
-
- private:
-#define IS_VALID_TYPE_SPECIALIZATION(type, name) \
-  static inline constexpr bool Is ## name ## TypeImpl(type* const  = 0) { \
-    return true; \
-  } \
-  \
-  static_assert(sizeof(MaxType) >= sizeof(type), "MaxType too small")
-
-  IS_VALID_TYPE_SPECIALIZATION(bool, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(int8_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint8_t, PrimitiveNarrow);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(int16_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint16_t, PrimitiveNarrow);  // Chars are unsigned.
-  IS_VALID_TYPE_SPECIALIZATION(int32_t, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(uint32_t, PrimitiveNarrow);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(float, PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION(int64_t, PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION(uint64_t, PrimitiveWide);  // Not strictly true, but close enough.
-  IS_VALID_TYPE_SPECIALIZATION(double, PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION(mirror::Object*, Object);
-  IS_VALID_TYPE_SPECIALIZATION(Closure*, Lambda);
-#undef IS_VALID_TYPE_SPECIALIZATION
-
-#define IS_VALID_TYPE_SPECIALIZATION_IMPL(name) \
-  template <typename T> \
-  static inline constexpr bool Is ## name ## TypeImpl(T* const = 0) { \
-    return false; \
-  }
-
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(PrimitiveNarrow);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(PrimitiveWide);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(Object);
-  IS_VALID_TYPE_SPECIALIZATION_IMPL(Lambda);
-
-#undef IS_VALID_TYPE_SPECIALIZATION_IMPL
-};
-
-// Maps the ShortyFieldType enum into it's C++ type equivalent, into the "type" typedef.
-// For example:
-//     ShortyFieldTypeSelectType<ShortyFieldType::kBoolean>::type => bool
-//     ShortyFieldTypeSelectType<ShortyFieldType::kLong>::type => int64_t
-//
-// Invalid enums will not have the type defined.
-template <decltype(ShortyFieldType::kByte) Shorty>
-struct ShortyFieldTypeSelectType {
-};
-
-// Maps the C++ type into it's ShortyFieldType enum equivalent, into the "value" constexpr.
-// For example:
-//     ShortyFieldTypeSelectEnum<bool>::value => ShortyFieldType::kBoolean
-//     ShortyFieldTypeSelectEnum<int64_t>::value => ShortyFieldType::kLong
-//
-// Signed-ness must match for a valid select, e.g. uint64_t will not map to kLong, but int64_t will.
-// Invalid types will not have the value defined (see e.g. ShortyFieldTypeTraits::IsType<T>())
-template <typename T>
-struct ShortyFieldTypeSelectEnum {
-};
-
-#define SHORTY_FIELD_TYPE_SELECT_IMPL(cpp_type, enum_element)      \
-template <> \
-struct ShortyFieldTypeSelectType<ShortyFieldType::enum_element> { \
-  using type = cpp_type; \
-}; \
-\
-template <> \
-struct ShortyFieldTypeSelectEnum<cpp_type> { \
-  static constexpr const auto value = ShortyFieldType::enum_element; \
-}; \
-
-SHORTY_FIELD_TYPE_SELECT_IMPL(bool, kBoolean);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int8_t, kByte);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int16_t, kShort);
-SHORTY_FIELD_TYPE_SELECT_IMPL(uint16_t, kChar);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int32_t, kInt);
-SHORTY_FIELD_TYPE_SELECT_IMPL(float, kFloat);
-SHORTY_FIELD_TYPE_SELECT_IMPL(int64_t, kLong);
-SHORTY_FIELD_TYPE_SELECT_IMPL(double, kDouble);
-SHORTY_FIELD_TYPE_SELECT_IMPL(mirror::Object*, kObject);
-SHORTY_FIELD_TYPE_SELECT_IMPL(Closure*, kLambda);
-
-}  // namespace lambda
-}  // namespace art
-
-#endif  // ART_RUNTIME_LAMBDA_SHORTY_FIELD_TYPE_H_
diff --git a/runtime/lambda/shorty_field_type_test.cc b/runtime/lambda/shorty_field_type_test.cc
deleted file mode 100644
index 32bade9..0000000
--- a/runtime/lambda/shorty_field_type_test.cc
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lambda/shorty_field_type.h"
-#include "mirror/object_reference.h"
-
-#include "utils.h"
-#include <numeric>
-#include <stdint.h>
-#include "gtest/gtest.h"
-
-#define EXPECT_NULL(expected) EXPECT_EQ(reinterpret_cast<const void*>(expected), \
-                                        reinterpret_cast<void*>(nullptr));
-
-namespace art {
-namespace lambda {
-
-class ShortyFieldTypeTest : public ::testing::Test {
- public:
-  ShortyFieldTypeTest() = default;
-  ~ShortyFieldTypeTest() = default;
-
- protected:
-  static void SetUpTestCase() {
-  }
-
-  virtual void SetUp() {
-  }
-
-  static ::testing::AssertionResult IsResultSuccessful(bool result) {
-    if (result) {
-      return ::testing::AssertionSuccess();
-    } else {
-      return ::testing::AssertionFailure();
-    }
-  }
-
-  template <typename T>
-  static std::string ListToString(const T& list) {
-    std::stringstream stream;
-
-    stream << "[";
-    for (auto&& val : list) {
-      stream << val << ", ";
-    }
-    stream << "]";
-
-    return stream.str();
-  }
-
-  // Compare two vector-like types for equality.
-  template <typename T>
-  static ::testing::AssertionResult AreListsEqual(const T& expected, const T& actual) {
-    bool success = true;
-    std::stringstream stream;
-
-    if (expected.size() != actual.size()) {
-      success = false;
-      stream << "Expected list size: " << expected.size()
-             << ", but got list size: " << actual.size();
-      stream << std::endl;
-    }
-
-    for (size_t j = 0; j < std::min(expected.size(), actual.size()); ++j) {
-      if (expected[j] != actual[j]) {
-        success = false;
-        stream << "Expected element '" << j << "' to be '" << expected[j] << "', but got actual: '"
-               << actual[j] << "'.";
-        stream << std::endl;
-      }
-    }
-
-    if (success) {
-      return ::testing::AssertionSuccess();
-    }
-
-    stream << "Expected list was: " << ListToString(expected)
-           << ", actual list was: " << ListToString(actual);
-
-    return ::testing::AssertionFailure() << stream.str();
-  }
-
-  static std::vector<ShortyFieldType> ParseLongTypeDescriptorsToList(const char* type_descriptor) {
-    std::vector<ShortyFieldType> lst;
-
-    ShortyFieldType shorty;
-
-    const char* parsed = type_descriptor;
-    while ((parsed = ShortyFieldType::ParseFromFieldTypeDescriptor(parsed, &shorty)) != nullptr) {
-      lst.push_back(shorty);
-    }
-
-    return lst;
-  }
-
- protected:
-  // Shorthands for the ShortyFieldType constants.
-  // The letters are the same as JNI letters, with kS_ being a lambda since \ is not available.
-  static constexpr ShortyFieldType kSZ = ShortyFieldType::kBoolean;
-  static constexpr ShortyFieldType kSB = ShortyFieldType::kByte;
-  static constexpr ShortyFieldType kSC = ShortyFieldType::kChar;
-  static constexpr ShortyFieldType kSS = ShortyFieldType::kShort;
-  static constexpr ShortyFieldType kSI = ShortyFieldType::kInt;
-  static constexpr ShortyFieldType kSF = ShortyFieldType::kFloat;
-  static constexpr ShortyFieldType kSJ = ShortyFieldType::kLong;
-  static constexpr ShortyFieldType kSD = ShortyFieldType::kDouble;
-  static constexpr ShortyFieldType kSL = ShortyFieldType::kObject;
-  static constexpr ShortyFieldType kS_ = ShortyFieldType::kLambda;
-};
-
-TEST_F(ShortyFieldTypeTest, TestMaybeCreate) {
-  ShortyFieldType shorty;
-
-  std::vector<char> shorties = {'Z', 'B', 'C', 'S', 'I', 'F', 'J', 'D', 'L', '\\'};
-
-  // All valid 'shorty' characters are created successfully.
-  for (const char c : shorties) {
-    EXPECT_TRUE(ShortyFieldType::MaybeCreate(c, &shorty)) << c;
-    EXPECT_EQ(c, static_cast<char>(c));
-  }
-
-  // All other characters can never be created.
-  for (unsigned char c = 0; c < std::numeric_limits<unsigned char>::max(); ++c) {
-    // Skip the valid characters.
-    if (std::find(shorties.begin(), shorties.end(), c) != shorties.end()) { continue; }
-    // All invalid characters should fail.
-    EXPECT_FALSE(ShortyFieldType::MaybeCreate(static_cast<char>(c), &shorty)) << c;
-  }
-}  // TEST_F
-
-TEST_F(ShortyFieldTypeTest, TestCreateFromFieldTypeDescriptor) {
-  // Sample input.
-  std::vector<const char*> lengthies = {
-      "Z", "B", "C", "S", "I", "F", "J", "D", "LObject;", "\\Closure;",
-      "[Z", "[[B", "[[LObject;"
-  };
-
-  // Expected output.
-  std::vector<ShortyFieldType> expected = {
-      ShortyFieldType::kBoolean,
-      ShortyFieldType::kByte,
-      ShortyFieldType::kChar,
-      ShortyFieldType::kShort,
-      ShortyFieldType::kInt,
-      ShortyFieldType::kFloat,
-      ShortyFieldType::kLong,
-      ShortyFieldType::kDouble,
-      ShortyFieldType::kObject,
-      ShortyFieldType::kLambda,
-      // Arrays are always treated as objects.
-      ShortyFieldType::kObject,
-      ShortyFieldType::kObject,
-      ShortyFieldType::kObject,
-  };
-
-  // All valid lengthy types are correctly turned into the expected shorty type.
-  for (size_t i = 0; i < lengthies.size(); ++i) {
-    EXPECT_EQ(expected[i], ShortyFieldType::CreateFromFieldTypeDescriptor(lengthies[i]));
-  }
-}  // TEST_F
-
-TEST_F(ShortyFieldTypeTest, TestParseFromFieldTypeDescriptor) {
-  // Sample input.
-  std::vector<const char*> lengthies = {
-      // Empty list
-      "",
-      // Primitives
-      "Z", "B", "C", "S", "I", "F", "J", "D",
-      // Non-primitives
-      "LObject;", "\\Closure;",
-      // Arrays. The biggest PITA.
-      "[Z", "[[B", "[[LObject;", "[[[[\\Closure;",
-      // Multiple things at once:
-      "ZBCSIFJD",
-      "LObject;LObject;SSI",
-      "[[ZDDZ",
-      "[[LObject;[[Z[F\\Closure;LObject;",
-  };
-
-  // Expected output.
-  std::vector<std::vector<ShortyFieldType>> expected = {
-      // Empty list
-      {},
-      // Primitives
-      {kSZ}, {kSB}, {kSC}, {kSS}, {kSI}, {kSF}, {kSJ}, {kSD},
-      // Non-primitives.
-      { ShortyFieldType::kObject }, { ShortyFieldType::kLambda },
-      // Arrays are always treated as objects.
-      { kSL }, { kSL }, { kSL }, { kSL },
-      // Multiple things at once:
-      { kSZ, kSB, kSC, kSS, kSI, kSF, kSJ, kSD },
-      { kSL, kSL, kSS, kSS, kSI },
-      { kSL, kSD, kSD, kSZ },
-      { kSL, kSL, kSL, kS_, kSL },
-  };
-
-  // Sanity check that the expected/actual lists are the same size.. when adding new entries.
-  ASSERT_EQ(expected.size(), lengthies.size());
-
-  // All valid lengthy types are correctly turned into the expected shorty type.
-  for (size_t i = 0; i < expected.size(); ++i) {
-    const std::vector<ShortyFieldType>& expected_list = expected[i];
-    std::vector<ShortyFieldType> actual_list = ParseLongTypeDescriptorsToList(lengthies[i]);
-    EXPECT_TRUE(AreListsEqual(expected_list, actual_list));
-  }
-}  // TEST_F
-
-// Helper class to probe a shorty's characteristics by minimizing copy-and-paste tests.
-template <typename T, decltype(ShortyFieldType::kByte) kShortyEnum>
-struct ShortyTypeCharacteristics {
-  bool is_primitive_ = false;
-  bool is_primitive_narrow_ = false;
-  bool is_primitive_wide_ = false;
-  bool is_object_ = false;
-  bool is_lambda_ = false;
-  size_t size_ = sizeof(T);
-  bool is_dynamic_sized_ = false;
-
-  void CheckExpects() {
-    ShortyFieldType shorty = kShortyEnum;
-
-    // Test the main non-parsing-related ShortyFieldType characteristics.
-    EXPECT_EQ(is_primitive_, shorty.IsPrimitive());
-    EXPECT_EQ(is_primitive_narrow_, shorty.IsPrimitiveNarrow());
-    EXPECT_EQ(is_primitive_wide_, shorty.IsPrimitiveWide());
-    EXPECT_EQ(is_object_, shorty.IsObject());
-    EXPECT_EQ(is_lambda_, shorty.IsLambda());
-    EXPECT_EQ(size_, shorty.GetStaticSize());
-    EXPECT_EQ(is_dynamic_sized_, !shorty.IsStaticSize());
-
-    // Test compile-time ShortyFieldTypeTraits.
-    EXPECT_TRUE(ShortyFieldTypeTraits::IsType<T>());
-    EXPECT_EQ(is_primitive_, ShortyFieldTypeTraits::IsPrimitiveType<T>());
-    EXPECT_EQ(is_primitive_narrow_, ShortyFieldTypeTraits::IsPrimitiveNarrowType<T>());
-    EXPECT_EQ(is_primitive_wide_, ShortyFieldTypeTraits::IsPrimitiveWideType<T>());
-    EXPECT_EQ(is_object_, ShortyFieldTypeTraits::IsObjectType<T>());
-    EXPECT_EQ(is_lambda_, ShortyFieldTypeTraits::IsLambdaType<T>());
-
-    // Test compile-time ShortyFieldType selectors
-    static_assert(std::is_same<T, typename ShortyFieldTypeSelectType<kShortyEnum>::type>::value,
-                  "ShortyFieldType Enum->Type incorrect mapping");
-    auto kActualEnum = ShortyFieldTypeSelectEnum<T>::value;  // Do not ODR-use, avoid linker error.
-    EXPECT_EQ(kShortyEnum, kActualEnum);
-  }
-};
-
-TEST_F(ShortyFieldTypeTest, TestCharacteristicsAndTraits) {
-  // Boolean test
-  {
-    SCOPED_TRACE("boolean");
-    ShortyTypeCharacteristics<bool, ShortyFieldType::kBoolean> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Byte test
-  {
-    SCOPED_TRACE("byte");
-    ShortyTypeCharacteristics<int8_t, ShortyFieldType::kByte> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Char test
-  {
-    SCOPED_TRACE("char");
-    ShortyTypeCharacteristics<uint16_t, ShortyFieldType::kChar> chars;  // Char is unsigned.
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Short test
-  {
-    SCOPED_TRACE("short");
-    ShortyTypeCharacteristics<int16_t, ShortyFieldType::kShort> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Int test
-  {
-    SCOPED_TRACE("int");
-    ShortyTypeCharacteristics<int32_t, ShortyFieldType::kInt> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Long test
-  {
-    SCOPED_TRACE("long");
-    ShortyTypeCharacteristics<int64_t, ShortyFieldType::kLong> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_wide_ = true;
-    chars.CheckExpects();
-  }
-
-  // Float test
-  {
-    SCOPED_TRACE("float");
-    ShortyTypeCharacteristics<float, ShortyFieldType::kFloat> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_narrow_ = true;
-    chars.CheckExpects();
-  }
-
-  // Double test
-  {
-    SCOPED_TRACE("double");
-    ShortyTypeCharacteristics<double, ShortyFieldType::kDouble> chars;
-    chars.is_primitive_ = true;
-    chars.is_primitive_wide_ = true;
-    chars.CheckExpects();
-  }
-
-  // Object test
-  {
-    SCOPED_TRACE("object");
-    ShortyTypeCharacteristics<mirror::Object*, ShortyFieldType::kObject> chars;
-    chars.is_object_ = true;
-    chars.size_ = kObjectReferenceSize;
-    chars.CheckExpects();
-    EXPECT_EQ(kObjectReferenceSize, sizeof(mirror::CompressedReference<mirror::Object>));
-  }
-
-  // Lambda test
-  {
-    SCOPED_TRACE("lambda");
-    ShortyTypeCharacteristics<Closure*, ShortyFieldType::kLambda> chars;
-    chars.is_lambda_ = true;
-    chars.is_dynamic_sized_ = true;
-    chars.CheckExpects();
-  }
-}
-
-}  // namespace lambda
-}  // namespace art
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index 341501b..4a2a293 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -43,17 +43,15 @@
 
 inline size_t LockWord::ForwardingAddress() const {
   DCHECK_EQ(GetState(), kForwardingAddress);
-  return value_ << kStateSize;
+  return value_ << kForwardingAddressShift;
 }
 
 inline LockWord::LockWord() : value_(0) {
   DCHECK_EQ(GetState(), kUnlocked);
 }
 
-inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
-    : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
-             (kStateFat << kStateShift)) {
-  DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+inline LockWord::LockWord(Monitor* mon, uint32_t gc_state)
+    : value_(mon->GetMonitorId() | (gc_state << kGCStateShift) | (kStateFat << kStateShift)) {
 #ifndef __LP64__
   DCHECK_ALIGNED(mon, kMonitorIdAlignment);
 #endif
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 5d0d204..538b6eb 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -35,27 +35,27 @@
  * the state. The four possible states are fat locked, thin/unlocked, hash code, and forwarding
  * address. When the lock word is in the "thin" state and its bits are formatted as follows:
  *
- *  |33|22|222222221111|1111110000000000|
- *  |10|98|765432109876|5432109876543210|
- *  |00|rb| lock count |thread id owner |
+ *  |33|2|2|222222221111|1111110000000000|
+ *  |10|9|8|765432109876|5432109876543210|
+ *  |00|m|r| lock count |thread id owner |
  *
  * When the lock word is in the "fat" state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |01|rb| MonitorId                  |
+ *  |33|2|2|2222222211111111110000000000|
+ *  |10|9|8|7654321098765432109876543210|
+ *  |01|m|r| MonitorId                  |
  *
  * When the lock word is in hash state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |10|rb| HashCode                   |
+ *  |33|2|2|2222222211111111110000000000|
+ *  |10|9|8|7654321098765432109876543210|
+ *  |10|m|r| HashCode                   |
  *
- * When the lock word is in fowarding address state and its bits are formatted as follows:
+ * When the lock word is in forwarding address state and its bits are formatted as follows:
  *
- *  |33|22|2222222211111111110000000000|
- *  |10|98|7654321098765432109876543210|
- *  |11| ForwardingAddress             |
+ *  |33|2|22222222211111111110000000000|
+ *  |10|9|87654321098765432109876543210|
+ *  |11|0| ForwardingAddress           |
  *
  * The rb bits store the read barrier state.
  */
@@ -64,11 +64,13 @@
   enum SizeShiftsAndMasks {  // private marker to avoid generate-operator-out.py from processing.
     // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
     kStateSize = 2,
-    kReadBarrierStateSize = 2,
+    kReadBarrierStateSize = 1,
+    kMarkBitStateSize = 1,
     // Number of bits to encode the thin lock owner.
     kThinLockOwnerSize = 16,
     // Remaining bits are the recursive lock count.
-    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize,
+    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize -
+        kMarkBitStateSize,
     // Thin lock bits. Owner in lowest bits.
 
     kThinLockOwnerShift = 0,
@@ -81,25 +83,43 @@
     kThinLockCountOne = 1 << kThinLockCountShift,  // == 65536 (0x10000)
 
     // State in the highest bits.
-    kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift,
+    kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift +
+        kMarkBitStateSize,
     kStateMask = (1 << kStateSize) - 1,
     kStateMaskShifted = kStateMask << kStateShift,
     kStateThinOrUnlocked = 0,
     kStateFat = 1,
     kStateHash = 2,
     kStateForwardingAddress = 3,
+
+    // Read barrier bit.
     kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
     kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1,
     kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift,
     kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
 
+    // Mark bit.
+    kMarkBitStateShift = kReadBarrierStateSize + kReadBarrierStateShift,
+    kMarkBitStateMask = (1 << kMarkBitStateSize) - 1,
+    kMarkBitStateMaskShifted = kMarkBitStateMask << kMarkBitStateShift,
+    kMarkBitStateMaskShiftedToggled = ~kMarkBitStateMaskShifted,
+
+    // GC state is mark bit and read barrier state.
+    kGCStateSize = kReadBarrierStateSize + kMarkBitStateSize,
+    kGCStateShift = kReadBarrierStateShift,
+    kGCStateMaskShifted = kReadBarrierStateMaskShifted | kMarkBitStateMaskShifted,
+    kGCStateMaskShiftedToggled = ~kGCStateMaskShifted,
+
     // When the state is kHashCode, the non-state bits hold the hashcode.
     // Note Object.hashCode() has the hash code layout hardcoded.
     kHashShift = 0,
-    kHashSize = 32 - kStateSize - kReadBarrierStateSize,
+    kHashSize = 32 - kStateSize - kReadBarrierStateSize - kMarkBitStateSize,
     kHashMask = (1 << kHashSize) - 1,
     kMaxHash = kHashMask,
 
+    // Forwarding address shift.
+    kForwardingAddressShift = kObjectAlignmentShift,
+
     kMonitorIdShift = kHashShift,
     kMonitorIdSize = kHashSize,
     kMonitorIdMask = kHashMask,
@@ -108,31 +128,31 @@
     kMaxMonitorId = kMaxHash
   };
 
-  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
+  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t gc_state) {
     CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
     CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
-    return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
-                    (rb_state << kReadBarrierStateShift) |
+    // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
+    return LockWord((thread_id << kThinLockOwnerShift) |
+                    (count << kThinLockCountShift) |
+                    (gc_state << kGCStateShift) |
                     (kStateThinOrUnlocked << kStateShift));
   }
 
   static LockWord FromForwardingAddress(size_t target) {
     DCHECK_ALIGNED(target, (1 << kStateSize));
-    return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
+    return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
   }
 
-  static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
+  static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
     return LockWord((hash_code << kHashShift) |
-                    (rb_state << kReadBarrierStateShift) |
+                    (gc_state << kGCStateShift) |
                     (kStateHash << kStateShift));
   }
 
-  static LockWord FromDefault(uint32_t rb_state) {
-    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
-    return LockWord(rb_state << kReadBarrierStateShift);
+  static LockWord FromDefault(uint32_t gc_state) {
+    return LockWord(gc_state << kGCStateShift);
   }
 
   static bool IsDefault(LockWord lw) {
@@ -154,7 +174,7 @@
   LockState GetState() const {
     CheckReadBarrierState();
     if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) ||
-        (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) {
+        (kUseReadBarrier && UNLIKELY((value_ & kGCStateMaskShiftedToggled) == 0))) {
       return kUnlocked;
     } else {
       uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
@@ -176,6 +196,10 @@
     return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
   }
 
+  uint32_t GCState() const {
+    return (value_ & kGCStateMaskShifted) >> kGCStateShift;
+  }
+
   void SetReadBarrierState(uint32_t rb_state) {
     DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
@@ -184,6 +208,19 @@
     value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift;
   }
 
+
+  uint32_t MarkBitState() const {
+    return (value_ >> kMarkBitStateShift) & kMarkBitStateMask;
+  }
+
+  void SetMarkBitState(uint32_t mark_bit) {
+    DCHECK_EQ(mark_bit & ~kMarkBitStateMask, 0U);
+    DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
+    // Clear and or the bits.
+    value_ &= kMarkBitStateMaskShiftedToggled;
+    value_ |= mark_bit << kMarkBitStateShift;
+  }
+
   // Return the owner thin lock thread id.
   uint32_t ThinLockOwner() const;
 
@@ -197,7 +234,7 @@
   size_t ForwardingAddress() const;
 
   // Constructor a lock word for inflation to use a Monitor.
-  LockWord(Monitor* mon, uint32_t rb_state);
+  LockWord(Monitor* mon, uint32_t gc_state);
 
   // Return the hash code stored in the lock word, must be kHashCode state.
   int32_t GetHashCode() const;
@@ -207,7 +244,7 @@
     if (kIncludeReadBarrierState) {
       return lw1.GetValue() == lw2.GetValue();
     }
-    return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState();
+    return lw1.GetValueWithoutGCState() == lw2.GetValueWithoutGCState();
   }
 
   void Dump(std::ostream& os) {
@@ -248,9 +285,9 @@
     return value_;
   }
 
-  uint32_t GetValueWithoutReadBarrierState() const {
+  uint32_t GetValueWithoutGCState() const {
     CheckReadBarrierState();
-    return value_ & ~(kReadBarrierStateMask << kReadBarrierStateShift);
+    return value_ & kGCStateMaskShiftedToggled;
   }
 
   // Only Object should be converting LockWords to/from uints.
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 5d89c21..bb07fcb 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -25,12 +25,8 @@
 #include <sstream>
 
 #include "base/stringprintf.h"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#include "ScopedFd.h"
-#pragma GCC diagnostic pop
-
+#include "base/unix_file/fd_file.h"
+#include "os.h"
 #include "thread-inl.h"
 #include "utils.h"
 
@@ -157,6 +153,8 @@
     }
     return false;
   }
+
+  ScopedBacktraceMapIteratorLock lock(map.get());
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
     if ((begin >= it->start && begin < it->end)  // start of new within old
         && (end > it->start && end <= it->end)) {  // end of new within old
@@ -180,6 +178,7 @@
     *error_msg = StringPrintf("Failed to build process map");
     return false;
   }
+  ScopedBacktraceMapIteratorLock(map.get());
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
     if ((begin >= it->start && begin < it->end)      // start of new within old
         || (end > it->start && end < it->end)        // end of new within old
@@ -298,12 +297,13 @@
     flags |= MAP_FIXED;
   }
 
-  ScopedFd fd(-1);
+  File fd;
 
   if (use_ashmem) {
     if (!kIsTargetBuild) {
-      // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
-      // fail due to ulimit restrictions. If they will then use a regular mmap.
+      // When not on Android (either host or assuming a linux target) ashmem is faked using
+      // files in /tmp. Ensure that such files won't fail due to ulimit restrictions. If they
+      // will then use a regular mmap.
       struct rlimit rlimit_fsize;
       CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
       use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
@@ -316,8 +316,9 @@
     // prefixed "dalvik-".
     std::string debug_friendly_name("dalvik-");
     debug_friendly_name += name;
-    fd.reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count));
-    if (fd.get() == -1) {
+    fd.Reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count),
+             /* check_usage */ false);
+    if (fd.Fd() == -1) {
       *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", name, strerror(errno));
       return nullptr;
     }
@@ -331,14 +332,16 @@
                              page_aligned_byte_count,
                              prot,
                              flags,
-                             fd.get(),
+                             fd.Fd(),
                              0,
                              low_4gb);
   saved_errno = errno;
 
   if (actual == MAP_FAILED) {
     if (error_msg != nullptr) {
-      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      if (kIsDebugBuild || VLOG_IS_ON(oat)) {
+        PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      }
 
       *error_msg = StringPrintf("Failed anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0): %s. "
                                     "See process maps in the log.",
@@ -346,7 +349,7 @@
                                 page_aligned_byte_count,
                                 prot,
                                 flags,
-                                fd.get(),
+                                fd.Fd(),
                                 strerror(saved_errno));
     }
     return nullptr;
@@ -552,7 +555,7 @@
       return nullptr;
     }
   }
-  ScopedFd fd(int_fd);
+  File fd(int_fd, /* check_usage */ false);
 
   MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);
   // Unmap/map the tail region.
@@ -568,12 +571,12 @@
   // region. Note this isn't perfect as there's no way to prevent
   // other threads to try to take this memory region here.
   uint8_t* actual = reinterpret_cast<uint8_t*>(mmap(tail_base_begin, tail_base_size, tail_prot,
-                                              flags, fd.get(), 0));
+                                              flags, fd.Fd(), 0));
   if (actual == MAP_FAILED) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     *error_msg = StringPrintf("anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process "
                               "maps in the log.", tail_base_begin, tail_base_size, tail_prot, flags,
-                              fd.get());
+                              fd.Fd());
     return nullptr;
   }
   return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false);
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 3eaf576..597f0d4 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -68,7 +68,7 @@
                               bool low_4gb,
                               bool reuse,
                               std::string* error_msg,
-                              bool use_ashmem = true);
+                              bool use_ashmem = !kIsTargetLinux);
 
   // Create placeholder for a region allocated by direct call to mmap.
   // This is useful when we do not have control over the code calling mmap,
@@ -172,7 +172,7 @@
                      const char* tail_name,
                      int tail_prot,
                      std::string* error_msg,
-                     bool use_ashmem = true);
+                     bool use_ashmem = !kIsTargetLinux);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!Locks::mem_maps_lock_);
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index fbb0441..f018c1f 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -178,9 +178,9 @@
   }
 
   // Is `address` aligned on a machine word?
-  template<typename T> static bool IsWordAligned(const T* address) {
+  template<typename T> static constexpr bool IsWordAligned(const T* address) {
     // Word alignment in bytes.
-    size_t kWordAlignment = GetInstructionSetPointerSize(kRuntimeISA);
+    size_t kWordAlignment = static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA));
     return IsAlignedParam(address, kWordAlignment);
   }
 
diff --git a/runtime/mirror/abstract_method.cc b/runtime/mirror/abstract_method.cc
index 5a07dee..b4dce58 100644
--- a/runtime/mirror/abstract_method.cc
+++ b/runtime/mirror/abstract_method.cc
@@ -21,12 +21,9 @@
 namespace art {
 namespace mirror {
 
-template <bool kTransactionActive>
+template <PointerSize kPointerSize, bool kTransactionActive>
 bool AbstractMethod::CreateFromArtMethod(ArtMethod* method) {
-  auto* interface_method = method->GetInterfaceMethodIfProxy(
-      kTransactionActive
-          ? Runtime::Current()->GetClassLinker()->GetImagePointerSize()
-          : sizeof(void*));
+  auto* interface_method = method->GetInterfaceMethodIfProxy(kPointerSize);
   SetArtMethod<kTransactionActive>(method);
   SetFieldObject<kTransactionActive>(DeclaringClassOffset(), method->GetDeclaringClass());
   SetFieldObject<kTransactionActive>(
@@ -36,8 +33,14 @@
   return true;
 }
 
-template bool AbstractMethod::CreateFromArtMethod<false>(ArtMethod* method);
-template bool AbstractMethod::CreateFromArtMethod<true>(ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<PointerSize::k32, false>(
+    ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<PointerSize::k32, true>(
+    ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<PointerSize::k64, false>(
+    ArtMethod* method);
+template bool AbstractMethod::CreateFromArtMethod<PointerSize::k64, true>(
+    ArtMethod* method);
 
 ArtMethod* AbstractMethod::GetArtMethod() {
   return reinterpret_cast<ArtMethod*>(GetField64(ArtMethodOffset()));
diff --git a/runtime/mirror/abstract_method.h b/runtime/mirror/abstract_method.h
index a39f94d..cfbe492 100644
--- a/runtime/mirror/abstract_method.h
+++ b/runtime/mirror/abstract_method.h
@@ -34,7 +34,7 @@
 class MANAGED AbstractMethod : public AccessibleObject {
  public:
   // Called from Constructor::CreateFromArtMethod, Method::CreateFromArtMethod.
-  template <bool kTransactionActive = false>
+  template <PointerSize kPointerSize, bool kTransactionActive>
   bool CreateFromArtMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index c6fa15d..014e54b 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -30,7 +30,7 @@
 namespace art {
 namespace mirror {
 
-inline uint32_t Array::ClassSize(size_t pointer_size) {
+inline uint32_t Array::ClassSize(PointerSize pointer_size) {
   uint32_t vtable_entries = Object::kVTableLength;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
@@ -371,25 +371,23 @@
 }
 
 template<typename T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
-inline T PointerArray::GetElementPtrSize(uint32_t idx, size_t ptr_size) {
+inline T PointerArray::GetElementPtrSize(uint32_t idx, PointerSize ptr_size) {
   // C style casts here since we sometimes have T be a pointer, or sometimes an integer
   // (for stack traces).
-  if (ptr_size == 8) {
+  if (ptr_size == PointerSize::k64) {
     return (T)static_cast<uintptr_t>(
         AsLongArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
   }
-  DCHECK_EQ(ptr_size, 4u);
   return (T)static_cast<uintptr_t>(
       AsIntArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
 }
 
 template<bool kTransactionActive, bool kUnchecked>
-inline void PointerArray::SetElementPtrSize(uint32_t idx, uint64_t element, size_t ptr_size) {
-  if (ptr_size == 8) {
+inline void PointerArray::SetElementPtrSize(uint32_t idx, uint64_t element, PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
     (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(this)) : AsLongArray())->
         SetWithoutChecks<kTransactionActive>(idx, element);
   } else {
-    DCHECK_EQ(ptr_size, 4u);
     DCHECK_LE(element, static_cast<uint64_t>(0xFFFFFFFFu));
     (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(this)) : AsIntArray())
         ->SetWithoutChecks<kTransactionActive>(idx, static_cast<uint32_t>(element));
@@ -397,7 +395,7 @@
 }
 
 template<bool kTransactionActive, bool kUnchecked, typename T>
-inline void PointerArray::SetElementPtrSize(uint32_t idx, T* element, size_t ptr_size) {
+inline void PointerArray::SetElementPtrSize(uint32_t idx, T* element, PointerSize ptr_size) {
   SetElementPtrSize<kTransactionActive, kUnchecked>(idx,
                                                     reinterpret_cast<uintptr_t>(element),
                                                     ptr_size);
@@ -405,7 +403,7 @@
 
 template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void PointerArray::Fixup(mirror::PointerArray* dest,
-                                size_t pointer_size,
+                                PointerSize pointer_size,
                                 const Visitor& visitor) {
   for (size_t i = 0, count = GetLength(); i < count; ++i) {
     void* ptr = GetElementPtrSize<void*, kVerifyFlags, kReadBarrierOption>(i, pointer_size);
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 9a21ec2..ec10a43 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_ARRAY_H_
 #define ART_RUNTIME_MIRROR_ARRAY_H_
 
+#include "base/enums.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
 #include "object.h"
@@ -31,7 +32,7 @@
 class MANAGED Array : public Object {
  public:
   // The size of a java.lang.Class representing an array.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Allocates an array with the given properties, if kFillUsable is true the array will be of at
   // least component_count size, however, if there's usable space at the end of the allocation the
@@ -162,9 +163,10 @@
     array_class_ = GcRoot<Class>(array_class);
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   static Class* GetArrayClass() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(!array_class_.IsNull());
-    return array_class_.Read();
+    return array_class_.Read<kReadBarrierOption>();
   }
 
   static void ResetArrayClass() {
@@ -186,14 +188,14 @@
   template<typename T,
            VerifyObjectFlags kVerifyFlags = kVerifyNone,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  T GetElementPtrSize(uint32_t idx, size_t ptr_size)
+  T GetElementPtrSize(uint32_t idx, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<bool kTransactionActive = false, bool kUnchecked = false>
-  void SetElementPtrSize(uint32_t idx, uint64_t element, size_t ptr_size)
+  void SetElementPtrSize(uint32_t idx, uint64_t element, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
   template<bool kTransactionActive = false, bool kUnchecked = false, typename T>
-  void SetElementPtrSize(uint32_t idx, T* element, size_t ptr_size)
+  void SetElementPtrSize(uint32_t idx, T* element, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Fixup the pointers in the dest arrays by passing our pointers through the visitor. Only copies
@@ -201,7 +203,7 @@
   template <VerifyObjectFlags kVerifyFlags = kVerifyNone,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             typename Visitor>
-  void Fixup(mirror::PointerArray* dest, size_t pointer_size, const Visitor& visitor)
+  void Fixup(mirror::PointerArray* dest, PointerSize pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 };
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 0d95bb1..0f2aac2 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -26,7 +26,6 @@
 #include "base/length_prefixed_array.h"
 #include "class_loader.h"
 #include "common_throws.h"
-#include "dex_cache.h"
 #include "dex_file.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
@@ -80,13 +79,12 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline ArraySlice<ArtMethod> Class::GetDirectMethodsSlice(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetDirectMethodsSlice(PointerSize pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
   return GetDirectMethodsSliceUnchecked(pointer_size);
 }
 
-inline ArraySlice<ArtMethod> Class::GetDirectMethodsSliceUnchecked(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetDirectMethodsSliceUnchecked(PointerSize pointer_size) {
   return ArraySlice<ArtMethod>(GetMethodsPtr(),
                                GetDirectMethodsStartOffset(),
                                GetVirtualMethodsStartOffset(),
@@ -95,13 +93,12 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSlice(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSlice(PointerSize pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
   return GetDeclaredMethodsSliceUnchecked(pointer_size);
 }
 
-inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSliceUnchecked(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSliceUnchecked(PointerSize pointer_size) {
   return ArraySlice<ArtMethod>(GetMethodsPtr(),
                                GetDirectMethodsStartOffset(),
                                GetCopiedMethodsStartOffset(),
@@ -109,13 +106,13 @@
                                ArtMethod::Alignment(pointer_size));
 }
 template<VerifyObjectFlags kVerifyFlags>
-inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSlice(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSlice(PointerSize pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
   return GetDeclaredVirtualMethodsSliceUnchecked(pointer_size);
 }
 
-inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSliceUnchecked(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSliceUnchecked(
+    PointerSize pointer_size) {
   return ArraySlice<ArtMethod>(GetMethodsPtr(),
                                GetVirtualMethodsStartOffset(),
                                GetCopiedMethodsStartOffset(),
@@ -124,13 +121,12 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSlice(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSlice(PointerSize pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
   return GetVirtualMethodsSliceUnchecked(pointer_size);
 }
 
-inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSliceUnchecked(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSliceUnchecked(PointerSize pointer_size) {
   LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
   return ArraySlice<ArtMethod>(methods,
                                GetVirtualMethodsStartOffset(),
@@ -140,13 +136,12 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSlice(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSlice(PointerSize pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
   return GetCopiedMethodsSliceUnchecked(pointer_size);
 }
 
-inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSliceUnchecked(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSliceUnchecked(PointerSize pointer_size) {
   LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
   return ArraySlice<ArtMethod>(methods,
                                GetCopiedMethodsStartOffset(),
@@ -161,7 +156,7 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline ArraySlice<ArtMethod> Class::GetMethodsSlice(size_t pointer_size) {
+inline ArraySlice<ArtMethod> Class::GetMethodsSlice(PointerSize pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
   LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
   return ArraySlice<ArtMethod>(methods,
@@ -177,12 +172,12 @@
   return (methods == nullptr) ? 0 : methods->size();
 }
 
-inline ArtMethod* Class::GetDirectMethodUnchecked(size_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetDirectMethodUnchecked(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   return &GetDirectMethodsSliceUnchecked(pointer_size).At(i);
 }
 
-inline ArtMethod* Class::GetDirectMethod(size_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetDirectMethod(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   return &GetDirectMethodsSlice(pointer_size).At(i);
 }
@@ -212,20 +207,20 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline ArtMethod* Class::GetVirtualMethod(size_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetVirtualMethod(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>())
       << PrettyClass(this) << " status=" << GetStatus();
   return GetVirtualMethodUnchecked(i, pointer_size);
 }
 
-inline ArtMethod* Class::GetVirtualMethodDuringLinking(size_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetVirtualMethodDuringLinking(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   DCHECK(IsLoaded() || IsErroneous());
   return GetVirtualMethodUnchecked(i, pointer_size);
 }
 
-inline ArtMethod* Class::GetVirtualMethodUnchecked(size_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetVirtualMethodUnchecked(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   return &GetVirtualMethodsSliceUnchecked(pointer_size).At(i);
 }
@@ -258,7 +253,7 @@
   return GetVTable() != nullptr ? GetVTable()->GetLength() : 0;
 }
 
-inline ArtMethod* Class::GetVTableEntry(uint32_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetVTableEntry(uint32_t i, PointerSize pointer_size) {
   if (ShouldHaveEmbeddedVTable()) {
     return GetEmbeddedVTableEntry(i, pointer_size);
   }
@@ -275,29 +270,29 @@
   SetField32<false>(MemberOffset(EmbeddedVTableLengthOffset()), len);
 }
 
-inline ImTable* Class::GetImt(size_t pointer_size) {
+inline ImTable* Class::GetImt(PointerSize pointer_size) {
   return GetFieldPtrWithSize<ImTable*>(MemberOffset(ImtPtrOffset(pointer_size)), pointer_size);
 }
 
-inline void Class::SetImt(ImTable* imt, size_t pointer_size) {
+inline void Class::SetImt(ImTable* imt, PointerSize pointer_size) {
   return SetFieldPtrWithSize<false>(MemberOffset(ImtPtrOffset(pointer_size)), imt, pointer_size);
 }
 
-inline MemberOffset Class::EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size) {
+inline MemberOffset Class::EmbeddedVTableEntryOffset(uint32_t i, PointerSize pointer_size) {
   return MemberOffset(
       EmbeddedVTableOffset(pointer_size).Uint32Value() + i * VTableEntrySize(pointer_size));
 }
 
-inline ArtMethod* Class::GetEmbeddedVTableEntry(uint32_t i, size_t pointer_size) {
+inline ArtMethod* Class::GetEmbeddedVTableEntry(uint32_t i, PointerSize pointer_size) {
   return GetFieldPtrWithSize<ArtMethod*>(EmbeddedVTableEntryOffset(i, pointer_size), pointer_size);
 }
 
 inline void Class::SetEmbeddedVTableEntryUnchecked(
-    uint32_t i, ArtMethod* method, size_t pointer_size) {
+    uint32_t i, ArtMethod* method, PointerSize pointer_size) {
   SetFieldPtrWithSize<false>(EmbeddedVTableEntryOffset(i, pointer_size), method, pointer_size);
 }
 
-inline void Class::SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
+inline void Class::SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method, PointerSize pointer_size) {
   auto* vtable = GetVTableDuringLinking();
   CHECK_EQ(method, vtable->GetElementPtrSize<ArtMethod*>(i, pointer_size));
   SetEmbeddedVTableEntryUnchecked(i, method, pointer_size);
@@ -453,10 +448,12 @@
   return false;
 }
 
-inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method, size_t pointer_size) {
+inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method,
+                                                       PointerSize pointer_size) {
   Class* declaring_class = method->GetDeclaringClass();
   DCHECK(declaring_class != nullptr) << PrettyClass(this);
   DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
+  DCHECK(!method->IsCopied());
   // TODO cache to improve lookup speed
   const int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
@@ -469,7 +466,7 @@
   return nullptr;
 }
 
-inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method, size_t pointer_size) {
+inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method, PointerSize pointer_size) {
   // Only miranda or default methods may come from interfaces and be used as a virtual.
   DCHECK(!method->GetDeclaringClass()->IsInterface() || method->IsDefault() || method->IsMiranda());
   // The argument method may from a super class.
@@ -477,13 +474,13 @@
   return GetVTableEntry(method->GetMethodIndex(), pointer_size);
 }
 
-inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method, size_t pointer_size) {
+inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method, PointerSize pointer_size) {
   DCHECK(!method->GetDeclaringClass()->IsInterface());
   return GetSuperClass()->GetVTableEntry(method->GetMethodIndex(), pointer_size);
 }
 
 inline ArtMethod* Class::FindVirtualMethodForVirtualOrInterface(ArtMethod* method,
-                                                                size_t pointer_size) {
+                                                                PointerSize pointer_size) {
   if (method->IsDirect()) {
     return method;
   }
@@ -521,13 +518,13 @@
 inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
   Class* super_class = GetSuperClass<kVerifyFlags, kReadBarrierOption>();
   return (super_class != nullptr)
-      ? MemberOffset(RoundUp(super_class->GetObjectSize(),
+      ? MemberOffset(RoundUp(super_class->GetObjectSize<kVerifyFlags, kReadBarrierOption>(),
                              sizeof(mirror::HeapReference<mirror::Object>)))
       : ClassOffset();
 }
 
 template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
-inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(size_t pointer_size) {
+inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(PointerSize pointer_size) {
   DCHECK(IsResolved());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
   if (ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>()) {
@@ -538,7 +535,8 @@
   return MemberOffset(base);
 }
 
-inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(size_t pointer_size) {
+inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(
+    PointerSize pointer_size) {
   DCHECK(IsLoaded());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
   if (ShouldHaveEmbeddedVTable()) {
@@ -637,8 +635,9 @@
   static_assert(sizeof(Primitive::Type) == sizeof(int32_t),
                 "art::Primitive::Type and int32_t have different sizes.");
   int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
-  Primitive::Type type = static_cast<Primitive::Type>(v32 & 0xFFFF);
-  DCHECK_EQ(static_cast<size_t>(v32 >> 16), Primitive::ComponentSizeShift(type));
+  Primitive::Type type = static_cast<Primitive::Type>(v32 & kPrimitiveTypeMask);
+  DCHECK_EQ(static_cast<size_t>(v32 >> kPrimitiveTypeSizeShiftShift),
+            Primitive::ComponentSizeShift(type));
   return type;
 }
 
@@ -647,8 +646,9 @@
   static_assert(sizeof(Primitive::Type) == sizeof(int32_t),
                 "art::Primitive::Type and int32_t have different sizes.");
   int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
-  size_t size_shift = static_cast<Primitive::Type>(v32 >> 16);
-  DCHECK_EQ(size_shift, Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & 0xFFFF)));
+  size_t size_shift = static_cast<Primitive::Type>(v32 >> kPrimitiveTypeSizeShiftShift);
+  DCHECK_EQ(size_shift,
+            Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & kPrimitiveTypeMask)));
   return size_shift;
 }
 
@@ -707,13 +707,13 @@
                                         uint32_t num_32bit_static_fields,
                                         uint32_t num_64bit_static_fields,
                                         uint32_t num_ref_static_fields,
-                                        size_t pointer_size) {
+                                        PointerSize pointer_size) {
   // Space used by java.lang.Class and its instance fields.
   uint32_t size = sizeof(Class);
   // Space used by embedded tables.
   if (has_embedded_vtable) {
-    size = RoundUp(size + sizeof(uint32_t), pointer_size);
-    size += pointer_size;  // size of pointer to IMT
+    size = RoundUp(size + sizeof(uint32_t), static_cast<size_t>(pointer_size));
+    size += static_cast<size_t>(pointer_size);  // size of pointer to IMT
     size += num_vtable_entries * VTableEntrySize(pointer_size);
   }
 
@@ -766,7 +766,8 @@
   }
   if (kVisitNativeRoots) {
     // Since this class is reachable, we must also visit the associated roots when we scan it.
-    VisitNativeRoots(visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+    VisitNativeRoots<kReadBarrierOption>(
+        visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
   }
 }
 
@@ -897,64 +898,63 @@
   }
 }
 
-inline void Class::SetDexCacheStrings(GcRoot<String>* new_dex_cache_strings) {
+inline void Class::SetDexCacheStrings(StringDexCacheType* new_dex_cache_strings) {
   SetFieldPtr<false>(DexCacheStringsOffset(), new_dex_cache_strings);
 }
 
-inline GcRoot<String>* Class::GetDexCacheStrings() {
-  return GetFieldPtr<GcRoot<String>*>(DexCacheStringsOffset());
+inline StringDexCacheType* Class::GetDexCacheStrings() {
+  return GetFieldPtr64<StringDexCacheType*>(DexCacheStringsOffset());
 }
 
-template<class Visitor>
-void mirror::Class::VisitNativeRoots(Visitor& visitor, size_t pointer_size) {
+template<ReadBarrierOption kReadBarrierOption, class Visitor>
+void mirror::Class::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
   for (ArtField& field : GetSFieldsUnchecked()) {
     // Visit roots first in case the declaring class gets moved.
     field.VisitRoots(visitor);
     if (kIsDebugBuild && IsResolved()) {
-      CHECK_EQ(field.GetDeclaringClass(), this) << GetStatus();
+      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
     }
   }
   for (ArtField& field : GetIFieldsUnchecked()) {
     // Visit roots first in case the declaring class gets moved.
     field.VisitRoots(visitor);
     if (kIsDebugBuild && IsResolved()) {
-      CHECK_EQ(field.GetDeclaringClass(), this) << GetStatus();
+      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
     }
   }
   for (ArtMethod& method : GetMethods(pointer_size)) {
-    method.VisitRoots(visitor, pointer_size);
+    method.VisitRoots<kReadBarrierOption>(visitor, pointer_size);
   }
 }
 
-inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(size_t pointer_size) {
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   return GetDirectMethodsSliceUnchecked(pointer_size).AsRange();
 }
 
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetDeclaredMethods(
-      size_t pointer_size) {
-  CheckPointerSize(pointer_size);
+      PointerSize pointer_size) {
   return GetDeclaredMethodsSliceUnchecked(pointer_size).AsRange();
 }
 
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetDeclaredVirtualMethods(
-      size_t pointer_size) {
-  CheckPointerSize(pointer_size);
+      PointerSize pointer_size) {
   return GetDeclaredVirtualMethodsSliceUnchecked(pointer_size).AsRange();
 }
 
-inline IterationRange<StrideIterator<ArtMethod>> Class::GetVirtualMethods(size_t pointer_size) {
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetVirtualMethods(
+    PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   return GetVirtualMethodsSliceUnchecked(pointer_size).AsRange();
 }
 
-inline IterationRange<StrideIterator<ArtMethod>> Class::GetCopiedMethods(size_t pointer_size) {
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetCopiedMethods(PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   return GetCopiedMethodsSliceUnchecked(pointer_size).AsRange();
 }
 
 
-inline IterationRange<StrideIterator<ArtMethod>> Class::GetMethods(size_t pointer_size) {
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetMethods(PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   return MakeIterationRangeFromLengthPrefixedArray(GetMethodsPtr(),
                                                    ArtMethod::Size(pointer_size),
@@ -977,13 +977,12 @@
   return MakeIterationRangeFromLengthPrefixedArray(GetSFieldsPtrUnchecked());
 }
 
-inline MemberOffset Class::EmbeddedVTableOffset(size_t pointer_size) {
+inline MemberOffset Class::EmbeddedVTableOffset(PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
-  return MemberOffset(ImtPtrOffset(pointer_size).Uint32Value() + pointer_size);
+  return MemberOffset(ImtPtrOffset(pointer_size).Uint32Value() + static_cast<size_t>(pointer_size));
 }
 
-inline void Class::CheckPointerSize(size_t pointer_size) {
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+inline void Class::CheckPointerSize(PointerSize pointer_size) {
   DCHECK_EQ(pointer_size, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
 }
 
@@ -1038,7 +1037,7 @@
 
 template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void Class::FixupNativePointers(mirror::Class* dest,
-                                       size_t pointer_size,
+                                       PointerSize pointer_size,
                                        const Visitor& visitor) {
   // Update the field arrays.
   LengthPrefixedArray<ArtField>* const sfields = GetSFieldsPtr();
@@ -1058,8 +1057,8 @@
     dest->SetMethodsPtrInternal(new_methods);
   }
   // Update dex cache strings.
-  GcRoot<mirror::String>* strings = GetDexCacheStrings();
-  GcRoot<mirror::String>* new_strings = visitor(strings);
+  StringDexCacheType* strings = GetDexCacheStrings();
+  StringDexCacheType* new_strings = visitor(strings);
   if (strings != new_strings) {
     dest->SetDexCacheStrings(new_strings);
   }
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 9c77d38..f948be7 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -334,8 +334,9 @@
   }
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const StringPiece& signature,
-                                      size_t pointer_size) {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name,
+                                      const StringPiece& signature,
+                                      PointerSize pointer_size) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature, pointer_size);
   if (method != nullptr) {
@@ -353,8 +354,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature,
-                                      size_t pointer_size) {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name,
+                                      const Signature& signature,
+                                      PointerSize pointer_size) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature, pointer_size);
   if (method != nullptr) {
@@ -372,8 +374,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                      size_t pointer_size) {
+ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache,
+                                      uint32_t dex_method_idx,
+                                      PointerSize pointer_size) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(dex_cache, dex_method_idx, pointer_size);
   if (method != nullptr) {
@@ -392,8 +395,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature,
-                                           size_t pointer_size) {
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name,
+                                           const StringPiece& signature,
+                                           PointerSize pointer_size) {
   for (auto& method : GetDirectMethods(pointer_size)) {
     if (name == method.GetName() && method.GetSignature() == signature) {
       return &method;
@@ -402,8 +406,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature,
-                                           size_t pointer_size) {
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name,
+                                           const Signature& signature,
+                                           PointerSize pointer_size) {
   for (auto& method : GetDirectMethods(pointer_size)) {
     if (name == method.GetName() && signature == method.GetSignature()) {
       return &method;
@@ -412,8 +417,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                           size_t pointer_size) {
+ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache,
+                                           uint32_t dex_method_idx,
+                                           PointerSize pointer_size) {
   if (GetDexCache() == dex_cache) {
     for (auto& method : GetDirectMethods(pointer_size)) {
       if (method.GetDexMethodIndex() == dex_method_idx) {
@@ -424,8 +430,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature,
-                                   size_t pointer_size) {
+ArtMethod* Class::FindDirectMethod(const StringPiece& name,
+                                   const StringPiece& signature,
+                                   PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -435,8 +442,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature,
-                                   size_t pointer_size) {
+ArtMethod* Class::FindDirectMethod(const StringPiece& name,
+                                   const Signature& signature,
+                                   PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -447,7 +455,7 @@
 }
 
 ArtMethod* Class::FindDirectMethod(
-    const DexCache* dex_cache, uint32_t dex_method_idx, size_t pointer_size) {
+    const DexCache* dex_cache, uint32_t dex_method_idx, PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx, pointer_size);
     if (method != nullptr) {
@@ -457,7 +465,8 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethodByName(const StringPiece& name, size_t pointer_size) {
+ArtMethod* Class::FindDeclaredDirectMethodByName(const StringPiece& name,
+                                                 PointerSize pointer_size) {
   for (auto& method : GetDirectMethods(pointer_size)) {
     ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
     if (name == np_method->GetName()) {
@@ -471,8 +480,9 @@
 // because they do not only find 'declared' methods and will return copied methods. This behavior is
 // desired and correct but the naming can lead to confusion because in the java language declared
 // excludes interface methods which might be found by this.
-ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature,
-                                            size_t pointer_size) {
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
+                                            const StringPiece& signature,
+                                            PointerSize pointer_size) {
   for (auto& method : GetVirtualMethods(pointer_size)) {
     ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
     if (name == np_method->GetName() && np_method->GetSignature() == signature) {
@@ -482,8 +492,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature,
-                                            size_t pointer_size) {
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
+                                            const Signature& signature,
+                                            PointerSize pointer_size) {
   for (auto& method : GetVirtualMethods(pointer_size)) {
     ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
     if (name == np_method->GetName() && signature == np_method->GetSignature()) {
@@ -493,8 +504,9 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                            size_t pointer_size) {
+ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache,
+                                            uint32_t dex_method_idx,
+                                            PointerSize pointer_size) {
   if (GetDexCache() == dex_cache) {
     for (auto& method : GetDeclaredVirtualMethods(pointer_size)) {
       if (method.GetDexMethodIndex() == dex_method_idx) {
@@ -505,7 +517,8 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethodByName(const StringPiece& name, size_t pointer_size) {
+ArtMethod* Class::FindDeclaredVirtualMethodByName(const StringPiece& name,
+                                                  PointerSize pointer_size) {
   for (auto& method : GetVirtualMethods(pointer_size)) {
     ArtMethod* const np_method = method.GetInterfaceMethodIfProxy(pointer_size);
     if (name == np_method->GetName()) {
@@ -516,7 +529,7 @@
 }
 
 ArtMethod* Class::FindVirtualMethod(
-    const StringPiece& name, const StringPiece& signature, size_t pointer_size) {
+    const StringPiece& name, const StringPiece& signature, PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -527,7 +540,7 @@
 }
 
 ArtMethod* Class::FindVirtualMethod(
-    const StringPiece& name, const Signature& signature, size_t pointer_size) {
+    const StringPiece& name, const Signature& signature, PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -538,7 +551,7 @@
 }
 
 ArtMethod* Class::FindVirtualMethod(
-    const DexCache* dex_cache, uint32_t dex_method_idx, size_t pointer_size) {
+    const DexCache* dex_cache, uint32_t dex_method_idx, PointerSize pointer_size) {
   for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx, pointer_size);
     if (method != nullptr) {
@@ -548,7 +561,7 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindVirtualMethodForInterfaceSuper(ArtMethod* method, size_t pointer_size) {
+ArtMethod* Class::FindVirtualMethodForInterfaceSuper(ArtMethod* method, PointerSize pointer_size) {
   DCHECK(method->GetDeclaringClass()->IsInterface());
   DCHECK(IsInterface()) << "Should only be called on a interface class";
   // Check if we have one defined on this interface first. This includes searching copied ones to
@@ -613,7 +626,7 @@
   return abstract_methods.empty() ? nullptr : abstract_methods[0];
 }
 
-ArtMethod* Class::FindClassInitializer(size_t pointer_size) {
+ArtMethod* Class::FindClassInitializer(PointerSize pointer_size) {
   for (ArtMethod& method : GetDirectMethods(pointer_size)) {
     if (method.IsClassInitializer()) {
       DCHECK_STREQ(method.GetName(), "<clinit>");
@@ -748,21 +761,24 @@
   return nullptr;
 }
 
-ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+ArtField* Class::FindStaticField(Thread* self,
+                                 Class* klass,
+                                 const DexCache* dex_cache,
                                  uint32_t dex_field_idx) {
-  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
+  for (Class* k = klass; k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
     if (f != nullptr) {
       return f;
     }
-    // Wrap k incase it moves during GetDirectInterface.
+    // Though GetDirectInterface() should not cause thread suspension when called
+    // from here, it takes a Handle as an argument, so we need to wrap `k`.
+    ScopedAssertNoThreadSuspension ants(self, __FUNCTION__);
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
+    Handle<mirror::Class> h_k(hs.NewHandle(k));
     // Is this field in any of this class' interfaces?
     for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
-      StackHandleScope<1> hs2(self);
-      Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
+      mirror::Class* interface = GetDirectInterface(self, h_k, i);
       f = FindStaticField(self, interface, dex_cache, dex_field_idx);
       if (f != nullptr) {
         return f;
@@ -800,7 +816,7 @@
   return nullptr;
 }
 
-void Class::SetSkipAccessChecksFlagOnAllMethods(size_t pointer_size) {
+void Class::SetSkipAccessChecksFlagOnAllMethods(PointerSize pointer_size) {
   DCHECK(IsVerified());
   for (auto& m : GetMethods(pointer_size)) {
     if (!m.IsNative() && m.IsInvokable()) {
@@ -914,7 +930,7 @@
   return GetDexFile().GetInterfacesList(*class_def);
 }
 
-void Class::PopulateEmbeddedVTable(size_t pointer_size) {
+void Class::PopulateEmbeddedVTable(PointerSize pointer_size) {
   PointerArray* table = GetVTableDuringLinking();
   CHECK(table != nullptr) << PrettyClass(this);
   const size_t table_length = table->GetLength();
@@ -960,9 +976,12 @@
 // The pre-fence visitor for Class::CopyOf().
 class CopyClassVisitor {
  public:
-  CopyClassVisitor(Thread* self, Handle<mirror::Class>* orig, size_t new_length,
-                   size_t copy_bytes, ImTable* imt,
-                   size_t pointer_size)
+  CopyClassVisitor(Thread* self,
+                   Handle<mirror::Class>* orig,
+                   size_t new_length,
+                   size_t copy_bytes,
+                   ImTable* imt,
+                   PointerSize pointer_size)
       : self_(self), orig_(orig), new_length_(new_length),
         copy_bytes_(copy_bytes), imt_(imt), pointer_size_(pointer_size) {
   }
@@ -988,12 +1007,11 @@
   const size_t new_length_;
   const size_t copy_bytes_;
   ImTable* imt_;
-  const size_t pointer_size_;
+  const PointerSize pointer_size_;
   DISALLOW_COPY_AND_ASSIGN(CopyClassVisitor);
 };
 
-Class* Class::CopyOf(Thread* self, int32_t new_length,
-                     ImTable* imt, size_t pointer_size) {
+Class* Class::CopyOf(Thread* self, int32_t new_length, ImTable* imt, PointerSize pointer_size) {
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
@@ -1019,14 +1037,14 @@
 
 // TODO: Move this to java_lang_Class.cc?
 ArtMethod* Class::GetDeclaredConstructor(
-    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, size_t pointer_size) {
+    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, PointerSize pointer_size) {
   for (auto& m : GetDirectMethods(pointer_size)) {
     // Skip <clinit> which is a static constructor, as well as non constructors.
     if (m.IsStatic() || !m.IsConstructor()) {
       continue;
     }
     // May cause thread suspension and exceptions.
-    if (m.GetInterfaceMethodIfProxy(sizeof(void*))->EqualParameters(args)) {
+    if (m.GetInterfaceMethodIfProxy(kRuntimePointerSize)->EqualParameters(args)) {
       return &m;
     }
     if (UNLIKELY(self->IsExceptionPending())) {
@@ -1050,7 +1068,7 @@
   return (type_id == nullptr) ? DexFile::kDexNoIndex : dex_file.GetIndexForTypeId(*type_id);
 }
 
-template <bool kTransactionActive>
+template <PointerSize kPointerSize, bool kTransactionActive>
 mirror::Method* Class::GetDeclaredMethodInternal(Thread* self,
                                                  mirror::Class* klass,
                                                  mirror::String* name,
@@ -1071,11 +1089,8 @@
   auto h_args = hs.NewHandle(args);
   Handle<mirror::Class> h_klass = hs.NewHandle(klass);
   ArtMethod* result = nullptr;
-  const size_t pointer_size = kTransactionActive
-                                  ? Runtime::Current()->GetClassLinker()->GetImagePointerSize()
-                                  : sizeof(void*);
-  for (auto& m : h_klass->GetDeclaredVirtualMethods(pointer_size)) {
-    auto* np_method = m.GetInterfaceMethodIfProxy(pointer_size);
+  for (auto& m : h_klass->GetDeclaredVirtualMethods(kPointerSize)) {
+    auto* np_method = m.GetInterfaceMethodIfProxy(kPointerSize);
     // May cause thread suspension.
     mirror::String* np_name = np_method->GetNameAsString(self);
     if (!np_name->Equals(h_method_name.Get()) || !np_method->EqualParameters(h_args)) {
@@ -1086,19 +1101,19 @@
     }
     auto modifiers = m.GetAccessFlags();
     if ((modifiers & kSkipModifiers) == 0) {
-      return mirror::Method::CreateFromArtMethod<kTransactionActive>(self, &m);
+      return mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
     }
     if ((modifiers & kAccMiranda) == 0) {
       result = &m;  // Remember as potential result if it's not a miranda method.
     }
   }
   if (result == nullptr) {
-    for (auto& m : h_klass->GetDirectMethods(pointer_size)) {
+    for (auto& m : h_klass->GetDirectMethods(kPointerSize)) {
       auto modifiers = m.GetAccessFlags();
       if ((modifiers & kAccConstructor) != 0) {
         continue;
       }
-      auto* np_method = m.GetInterfaceMethodIfProxy(pointer_size);
+      auto* np_method = m.GetInterfaceMethodIfProxy(kPointerSize);
       // May cause thread suspension.
       mirror::String* np_name = np_method->GetNameAsString(self);
       if (np_name == nullptr) {
@@ -1112,50 +1127,73 @@
         continue;
       }
       if ((modifiers & kSkipModifiers) == 0) {
-        return mirror::Method::CreateFromArtMethod<kTransactionActive>(self, &m);
+        return mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
       }
       // Direct methods cannot be miranda methods, so this potential result must be synthetic.
       result = &m;
     }
   }
   return result != nullptr
-      ? mirror::Method::CreateFromArtMethod<kTransactionActive>(self, result)
+      ? mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
       : nullptr;
 }
 
 template
-mirror::Method* Class::GetDeclaredMethodInternal<false>(Thread* self,
-                                                        mirror::Class* klass,
-                                                        mirror::String* name,
-                                                        mirror::ObjectArray<mirror::Class>* args);
+mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k32, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
 template
-mirror::Method* Class::GetDeclaredMethodInternal<true>(Thread* self,
-                                                       mirror::Class* klass,
-                                                       mirror::String* name,
-                                                       mirror::ObjectArray<mirror::Class>* args);
+mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k32, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k64, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k64, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::String* name,
+    mirror::ObjectArray<mirror::Class>* args);
 
-template <bool kTransactionActive>
+template <PointerSize kPointerSize, bool kTransactionActive>
 mirror::Constructor* Class::GetDeclaredConstructorInternal(
     Thread* self,
     mirror::Class* klass,
     mirror::ObjectArray<mirror::Class>* args) {
   StackHandleScope<1> hs(self);
-  const size_t pointer_size = kTransactionActive
-                                  ? Runtime::Current()->GetClassLinker()->GetImagePointerSize()
-                                  : sizeof(void*);
-  ArtMethod* result = klass->GetDeclaredConstructor(self, hs.NewHandle(args), pointer_size);
+  ArtMethod* result = klass->GetDeclaredConstructor(self, hs.NewHandle(args), kPointerSize);
   return result != nullptr
-      ? mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
+      ? mirror::Constructor::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
       : nullptr;
 }
 
 // mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
 
-template mirror::Constructor* Class::GetDeclaredConstructorInternal<false>(
+template
+mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k32, false>(
     Thread* self,
     mirror::Class* klass,
     mirror::ObjectArray<mirror::Class>* args);
-template mirror::Constructor* Class::GetDeclaredConstructorInternal<true>(
+template
+mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k32, true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k64, false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template
+mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k64, true>(
     Thread* self,
     mirror::Class* klass,
     mirror::ObjectArray<mirror::Class>* args);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index c1504de..e2cd649 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_H_
 #define ART_RUNTIME_MIRROR_CLASS_H_
 
+#include "base/enums.h"
 #include "base/iteration_range.h"
 #include "dex_file.h"
 #include "class_flags.h"
@@ -53,6 +54,9 @@
 class DexCache;
 class IfTable;
 class Method;
+struct StringDexCachePair;
+
+using StringDexCacheType = std::atomic<mirror::StringDexCachePair>;
 
 // C++ mirror of java.lang.Class
 class MANAGED Class FINAL : public Object {
@@ -63,6 +67,12 @@
   // 2 ref instance fields.]
   static constexpr uint32_t kClassWalkSuper = 0xC0000000;
 
+  // Shift primitive type by kPrimitiveTypeSizeShiftShift to get the component type size shift
+  // Used for computing array size as follows:
+  // array_bytes = header_size + (elements << (primitive_type >> kPrimitiveTypeSizeShiftShift))
+  static constexpr uint32_t kPrimitiveTypeSizeShiftShift = 16;
+  static constexpr uint32_t kPrimitiveTypeMask = (1u << kPrimitiveTypeSizeShiftShift) - 1;
+
   // Class Status
   //
   // kStatusRetired: Class that's temporarily used till class linking time
@@ -370,10 +380,10 @@
 
   void SetPrimitiveType(Primitive::Type new_type) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
-    int32_t v32 = static_cast<int32_t>(new_type);
-    DCHECK_EQ(v32 & 0xFFFF, v32) << "upper 16 bits aren't zero";
+    uint32_t v32 = static_cast<uint32_t>(new_type);
+    DCHECK_EQ(v32 & kPrimitiveTypeMask, v32) << "upper 16 bits aren't zero";
     // Store the component size shift in the upper 16 bits.
-    v32 |= Primitive::ComponentSizeShift(new_type) << 16;
+    v32 |= Primitive::ComponentSizeShift(new_type) << kPrimitiveTypeSizeShiftShift;
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32);
   }
 
@@ -471,7 +481,7 @@
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetComponentSize() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return 1U << GetComponentSizeShift();
+    return 1U << GetComponentSizeShift<kReadBarrierOption>();
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -556,17 +566,17 @@
                                    uint32_t num_32bit_static_fields,
                                    uint32_t num_64bit_static_fields,
                                    uint32_t num_ref_static_fields,
-                                   size_t pointer_size);
+                                   PointerSize pointer_size);
 
   // The size of java.lang.Class.class.
-  static uint32_t ClassClassSize(size_t pointer_size) {
+  static uint32_t ClassClassSize(PointerSize pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 72;
+    uint32_t vtable_entries = Object::kVTableLength + 70;
     return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
   // The size of a java.lang.Class representing a primitive such as int.class.
-  static uint32_t PrimitiveClassSize(size_t pointer_size) {
+  static uint32_t PrimitiveClassSize(PointerSize pointer_size) {
     return ComputeClassSize(false, 0, 0, 0, 0, 0, 0, pointer_size);
   }
 
@@ -665,7 +675,7 @@
   // `This` and `klass` must be classes.
   Class* GetCommonSuperClass(Handle<Class> klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetSuperClass(Class *new_super_class) SHARED_REQUIRES(Locks::mutator_lock_) {
+  void SetSuperClass(Class* new_super_class) SHARED_REQUIRES(Locks::mutator_lock_) {
     // Super class is assigned once, except during class linker initialization.
     Class* old_super_class = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
     DCHECK(old_super_class == nullptr || old_super_class == new_super_class);
@@ -703,7 +713,7 @@
   // Also updates the dex_cache_strings_ variable from new_dex_cache.
   void SetDexCache(DexCache* new_dex_cache) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDirectMethods(size_t pointer_size)
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDirectMethods(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetMethodsPtr()
@@ -713,7 +723,7 @@
     return MemberOffset(OFFSETOF_MEMBER(Class, methods_));
   }
 
-  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetMethods(size_t pointer_size)
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetMethods(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetMethodsPtr(LengthPrefixedArray<ArtMethod>* new_methods,
@@ -727,65 +737,66 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSlice(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSlice(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArtMethod* GetDirectMethod(size_t i, size_t pointer_size)
+  ALWAYS_INLINE ArtMethod* GetDirectMethod(size_t i, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Use only when we are allocating populating the method arrays.
-  ALWAYS_INLINE ArtMethod* GetDirectMethodUnchecked(size_t i, size_t pointer_size)
+  ALWAYS_INLINE ArtMethod* GetDirectMethodUnchecked(size_t i, PointerSize pointer_size)
         SHARED_REQUIRES(Locks::mutator_lock_);
-  ALWAYS_INLINE ArtMethod* GetVirtualMethodUnchecked(size_t i, size_t pointer_size)
+  ALWAYS_INLINE ArtMethod* GetVirtualMethodUnchecked(size_t i, PointerSize pointer_size)
         SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the number of static, private, and constructor methods.
   ALWAYS_INLINE uint32_t NumDirectMethods() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetMethodsSlice(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetMethodsSlice(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSlice(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSlice(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDeclaredMethods(
-        size_t pointer_size)
+        PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template <bool kTransactionActive = false>
+  template <PointerSize kPointerSize, bool kTransactionActive>
   static Method* GetDeclaredMethodInternal(Thread* self,
                                            mirror::Class* klass,
                                            mirror::String* name,
                                            mirror::ObjectArray<mirror::Class>* args)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  template <bool kTransactionActive = false>
+  template <PointerSize kPointerSize, bool kTransactionActive>
   static Constructor* GetDeclaredConstructorInternal(Thread* self,
                                                      mirror::Class* klass,
                                                      mirror::ObjectArray<mirror::Class>* args)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSlice(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSlice(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDeclaredVirtualMethods(
-        size_t pointer_size)
+        PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSlice(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSlice(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetCopiedMethods(size_t pointer_size)
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetCopiedMethods(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSlice(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSlice(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetVirtualMethods(size_t pointer_size)
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetVirtualMethods(
+      PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the number of non-inherited virtual methods (sum of declared and copied methods).
@@ -800,10 +811,10 @@
   ALWAYS_INLINE uint32_t NumMethods() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ArtMethod* GetVirtualMethod(size_t i, size_t pointer_size)
+  ArtMethod* GetVirtualMethod(size_t i, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* GetVirtualMethodDuringLinking(size_t i, size_t pointer_size)
+  ArtMethod* GetVirtualMethodDuringLinking(size_t i, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -822,9 +833,10 @@
     return MemberOffset(sizeof(Class));
   }
 
-  static MemberOffset ImtPtrOffset(size_t pointer_size) {
+  static MemberOffset ImtPtrOffset(PointerSize pointer_size) {
     return MemberOffset(
-        RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
+        RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t),
+                static_cast<size_t>(pointer_size)));
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -841,124 +853,126 @@
 
   bool HasVTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size);
+  static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, PointerSize pointer_size);
 
   int32_t GetVTableLength() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* GetVTableEntry(uint32_t i, size_t pointer_size)
+  ArtMethod* GetVTableEntry(uint32_t i, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   int32_t GetEmbeddedVTableLength() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetEmbeddedVTableLength(int32_t len) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ImTable* GetImt(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  ImTable* GetImt(PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetImt(ImTable* imt, size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  void SetImt(ImTable* imt, PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* GetEmbeddedVTableEntry(uint32_t i, size_t pointer_size)
+  ArtMethod* GetEmbeddedVTableEntry(uint32_t i, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
+  void SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  inline void SetEmbeddedVTableEntryUnchecked(uint32_t i, ArtMethod* method, size_t pointer_size)
+  inline void SetEmbeddedVTableEntryUnchecked(uint32_t i,
+                                              ArtMethod* method,
+                                              PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void PopulateEmbeddedVTable(size_t pointer_size)
+  void PopulateEmbeddedVTable(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class but potentially from a super class, return the
   // specific implementation method for this class.
-  ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class' super class, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForSuper(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForSuper(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method from some implementor of this interface, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForInterfaceSuper(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForInterfaceSuper(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class, but potentially from a
   // super class or interface, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForInterface(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForInterface(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE;
 
-  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method, size_t pointer_size)
+  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindInterfaceMethod(const StringPiece& name, const StringPiece& signature,
-                                 size_t pointer_size)
+                                 PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature,
-                                 size_t pointer_size)
+                                 PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                 size_t pointer_size)
+                                 PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature,
-                                      size_t pointer_size)
+                                      PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature,
-                                      size_t pointer_size)
+                                      PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                      size_t pointer_size)
+                                      PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDirectMethod(const StringPiece& name, const StringPiece& signature,
-                              size_t pointer_size)
+                              PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDirectMethod(const StringPiece& name, const Signature& signature,
-                              size_t pointer_size)
+                              PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                              size_t pointer_size)
+                              PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature,
-                                       size_t pointer_size)
+                                       PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature,
-                                       size_t pointer_size)
+                                       PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                                       size_t pointer_size)
+                                       PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethodByName(const StringPiece& name, size_t pointer_size)
+  ArtMethod* FindDeclaredVirtualMethodByName(const StringPiece& name, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethodByName(const StringPiece& name, size_t pointer_size)
+  ArtMethod* FindDeclaredDirectMethodByName(const StringPiece& name, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature,
-                               size_t pointer_size)
+                               PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindVirtualMethod(const StringPiece& name, const Signature& signature,
-                               size_t pointer_size)
+                               PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
-                               size_t pointer_size)
+                               PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ArtMethod* FindClassInitializer(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+  ArtMethod* FindClassInitializer(PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool HasDefaultMethods() SHARED_REQUIRES(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccHasDefaultMethod) != 0;
@@ -1040,11 +1054,11 @@
   // Get the offset of the first reference static field. Other reference static fields follow.
   template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  MemberOffset GetFirstReferenceStaticFieldOffset(size_t pointer_size)
+  MemberOffset GetFirstReferenceStaticFieldOffset(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the offset of the first reference static field. Other reference static fields follow.
-  MemberOffset GetFirstReferenceStaticFieldOffsetDuringLinking(size_t pointer_size)
+  MemberOffset GetFirstReferenceStaticFieldOffsetDuringLinking(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Gets the static fields of the class.
@@ -1091,7 +1105,9 @@
 
   // Finds the given static field in this class or superclass, only searches classes that
   // have the same dex cache.
-  static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+  static ArtField* FindStaticField(Thread* self,
+                                   Class* klass,
+                                   const DexCache* dex_cache,
                                    uint32_t dex_field_idx)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1151,12 +1167,12 @@
 
   // Visit native roots visits roots which are keyed off the native pointers such as ArtFields and
   // ArtMethods.
-  template<class Visitor>
-  void VisitNativeRoots(Visitor& visitor, size_t pointer_size)
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier, class Visitor>
+  void VisitNativeRoots(Visitor& visitor, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // When class is verified, set the kAccSkipAccessChecks flag on each method.
-  void SetSkipAccessChecksFlagOnAllMethods(size_t pointer_size)
+  void SetSkipAccessChecksFlagOnAllMethods(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the descriptor of the class. In a few cases a std::string is required, rather than
@@ -1191,7 +1207,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   Class* CopyOf(Thread* self, int32_t new_length, ImTable* imt,
-                size_t pointer_size)
+                PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // For proxy class only.
@@ -1206,8 +1222,8 @@
   bool GetSlowPathEnabled() SHARED_REQUIRES(Locks::mutator_lock_);
   void SetSlowPath(bool enabled) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  GcRoot<String>* GetDexCacheStrings() SHARED_REQUIRES(Locks::mutator_lock_);
-  void SetDexCacheStrings(GcRoot<String>* new_dex_cache_strings)
+  StringDexCacheType* GetDexCacheStrings() SHARED_REQUIRES(Locks::mutator_lock_);
+  void SetDexCacheStrings(StringDexCacheType* new_dex_cache_strings)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static MemberOffset DexCacheStringsOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_strings_);
@@ -1215,7 +1231,7 @@
 
   // May cause thread suspension due to EqualParameters.
   ArtMethod* GetDeclaredConstructor(
-      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, size_t pointer_size)
+      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static int32_t GetInnerClassFlags(Handle<Class> h_this, int32_t default_value)
@@ -1242,27 +1258,28 @@
     return GetClassLoader() == nullptr;
   }
 
-  static size_t ImTableEntrySize(size_t pointer_size) {
-    return pointer_size;
+  static size_t ImTableEntrySize(PointerSize pointer_size) {
+    return static_cast<size_t>(pointer_size);
   }
 
-  static size_t VTableEntrySize(size_t pointer_size) {
-    return pointer_size;
+  static size_t VTableEntrySize(PointerSize pointer_size) {
+    return static_cast<size_t>(pointer_size);
   }
 
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSliceUnchecked(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSliceUnchecked(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSliceUnchecked(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSliceUnchecked(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSliceUnchecked(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSliceUnchecked(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSliceUnchecked(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSliceUnchecked(
+      PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSliceUnchecked(size_t pointer_size)
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSliceUnchecked(PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Fix up all of the native pointers in the class by running them through the visitor. Only sets
@@ -1272,7 +1289,7 @@
   template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             typename Visitor>
-  void FixupNativePointers(mirror::Class* dest, size_t pointer_size, const Visitor& visitor)
+  void FixupNativePointers(mirror::Class* dest, PointerSize pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
@@ -1316,8 +1333,9 @@
   bool ProxyDescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Check that the pointer size matches the one in the class linker.
-  ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size);
-  static MemberOffset EmbeddedVTableOffset(size_t pointer_size);
+  ALWAYS_INLINE static void CheckPointerSize(PointerSize pointer_size);
+
+  static MemberOffset EmbeddedVTableOffset(PointerSize pointer_size);
   template <bool kVisitNativeRoots,
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
@@ -1328,8 +1346,6 @@
   // 'Class' Object Fields
   // Order governed by java field ordering. See art::ClassLinker::LinkFields.
 
-  HeapReference<Object> annotation_type_;
-
   // Defining class loader, or null for the "bootstrap" system loader.
   HeapReference<ClassLoader> class_loader_;
 
@@ -1375,9 +1391,6 @@
   // virtual_ methods_ for miranda methods.
   HeapReference<PointerArray> vtable_;
 
-  // Access flags; low 16 bits are defined by VM spec.
-  uint32_t access_flags_;
-
   // Short cuts to dex_cache_ member for fast compiled code access.
   uint64_t dex_cache_strings_;
 
@@ -1412,6 +1425,9 @@
   // Static fields length-prefixed array.
   uint64_t sfields_;
 
+  // Access flags; low 16 bits are defined by VM spec.
+  uint32_t access_flags_;
+
   // Class flags to help speed up visiting object references.
   uint32_t class_flags_;
 
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 0b3461f..a3071b7 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -22,27 +22,33 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/casts.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "mirror/class.h"
 #include "runtime.h"
 
+#include <atomic>
+
 namespace art {
 namespace mirror {
 
-inline uint32_t DexCache::ClassSize(size_t pointer_size) {
+inline uint32_t DexCache::ClassSize(PointerSize pointer_size) {
   uint32_t vtable_entries = Object::kVTableLength + 5;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
 
-inline String* DexCache::GetResolvedString(uint32_t string_idx) {
-  DCHECK_LT(string_idx, NumStrings());
-  return GetStrings()[string_idx].Read();
+inline mirror::String* DexCache::GetResolvedString(uint32_t string_idx) {
+  DCHECK_LT(string_idx, GetDexFile()->NumStringIds());
+  return StringDexCachePair::LookupString(GetStrings(), string_idx, NumStrings()).Read();
 }
 
-inline void DexCache::SetResolvedString(uint32_t string_idx, String* resolved) {
-  DCHECK_LT(string_idx, NumStrings());
+inline void DexCache::SetResolvedString(uint32_t string_idx, mirror::String* resolved) {
+  DCHECK_LT(string_idx % NumStrings(), NumStrings());
   // TODO default transaction support.
-  GetStrings()[string_idx] = GcRoot<String>(resolved);
+  StringDexCachePair idx_ptr;
+  idx_ptr.string_index = string_idx;
+  idx_ptr.string_pointer = GcRoot<String>(resolved);
+  GetStrings()[string_idx % NumStrings()].store(idx_ptr, std::memory_order_relaxed);
   // TODO: Fine-grained marking, so that we don't need to go through all arrays in full.
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(this);
 }
@@ -60,7 +66,7 @@
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(this);
 }
 
-inline ArtField* DexCache::GetResolvedField(uint32_t field_idx, size_t ptr_size) {
+inline ArtField* DexCache::GetResolvedField(uint32_t field_idx, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
   DCHECK_LT(field_idx, NumResolvedFields());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   ArtField* field = GetElementPtrSize(GetResolvedFields(), field_idx, ptr_size);
@@ -70,13 +76,13 @@
   return field;
 }
 
-inline void DexCache::SetResolvedField(uint32_t field_idx, ArtField* field, size_t ptr_size) {
+inline void DexCache::SetResolvedField(uint32_t field_idx, ArtField* field, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
   DCHECK_LT(field_idx, NumResolvedFields());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   SetElementPtrSize(GetResolvedFields(), field_idx, field, ptr_size);
 }
 
-inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx, size_t ptr_size) {
+inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
   DCHECK_LT(method_idx, NumResolvedMethods());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   ArtMethod* method = GetElementPtrSize<ArtMethod*>(GetResolvedMethods(), method_idx, ptr_size);
@@ -88,19 +94,20 @@
   return method;
 }
 
-inline void DexCache::SetResolvedMethod(uint32_t method_idx, ArtMethod* method, size_t ptr_size) {
+inline void DexCache::SetResolvedMethod(uint32_t method_idx,
+                                        ArtMethod* method,
+                                        PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
   DCHECK_LT(method_idx, NumResolvedMethods());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   SetElementPtrSize(GetResolvedMethods(), method_idx, method, ptr_size);
 }
 
 template <typename PtrType>
-inline PtrType DexCache::GetElementPtrSize(PtrType* ptr_array, size_t idx, size_t ptr_size) {
-  if (ptr_size == 8u) {
+inline PtrType DexCache::GetElementPtrSize(PtrType* ptr_array, size_t idx, PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
     uint64_t element = reinterpret_cast<const uint64_t*>(ptr_array)[idx];
     return reinterpret_cast<PtrType>(dchecked_integral_cast<uintptr_t>(element));
   } else {
-    DCHECK_EQ(ptr_size, 4u);
     uint32_t element = reinterpret_cast<const uint32_t*>(ptr_array)[idx];
     return reinterpret_cast<PtrType>(dchecked_integral_cast<uintptr_t>(element));
   }
@@ -110,12 +117,11 @@
 inline void DexCache::SetElementPtrSize(PtrType* ptr_array,
                                         size_t idx,
                                         PtrType ptr,
-                                        size_t ptr_size) {
-  if (ptr_size == 8u) {
+                                        PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
     reinterpret_cast<uint64_t*>(ptr_array)[idx] =
         dchecked_integral_cast<uint64_t>(reinterpret_cast<uintptr_t>(ptr));
   } else {
-    DCHECK_EQ(ptr_size, 4u);
     reinterpret_cast<uint32_t*>(ptr_array)[idx] =
         dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(ptr));
   }
@@ -130,9 +136,16 @@
   VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   // Visit arrays after.
   if (kVisitNativeRoots) {
-    GcRoot<mirror::String>* strings = GetStrings();
+    mirror::StringDexCacheType* strings = GetStrings();
     for (size_t i = 0, num_strings = NumStrings(); i != num_strings; ++i) {
-      visitor.VisitRootIfNonNull(strings[i].AddressWithoutBarrier());
+      StringDexCachePair source = strings[i].load(std::memory_order_relaxed);
+      mirror::String* before = source.string_pointer.Read<kReadBarrierOption>();
+      GcRoot<mirror::String> root(before);
+      visitor.VisitRootIfNonNull(root.AddressWithoutBarrier());
+      if (root.Read() != before) {
+        source.string_pointer = GcRoot<String>(root.Read());
+        strings[i].store(source, std::memory_order_relaxed);
+      }
     }
     GcRoot<mirror::Class>* resolved_types = GetResolvedTypes();
     for (size_t i = 0, num_types = NumResolvedTypes(); i != num_types; ++i) {
@@ -142,12 +155,14 @@
 }
 
 template <ReadBarrierOption kReadBarrierOption, typename Visitor>
-inline void DexCache::FixupStrings(GcRoot<mirror::String>* dest, const Visitor& visitor) {
-  GcRoot<mirror::String>* src = GetStrings();
+inline void DexCache::FixupStrings(mirror::StringDexCacheType* dest, const Visitor& visitor) {
+  mirror::StringDexCacheType* src = GetStrings();
   for (size_t i = 0, count = NumStrings(); i < count; ++i) {
-    mirror::String* source = src[i].Read<kReadBarrierOption>();
-    mirror::String* new_source = visitor(source);
-    dest[i] = GcRoot<mirror::String>(new_source);
+    StringDexCachePair source = src[i].load(std::memory_order_relaxed);
+    mirror::String* ptr = source.string_pointer.Read<kReadBarrierOption>();
+    mirror::String* new_source = visitor(ptr);
+    source.string_pointer = GcRoot<String>(new_source);
+    dest[i].store(source, std::memory_order_relaxed);
   }
 }
 
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 692c6cb..cfcec9c 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -33,7 +33,7 @@
 
 void DexCache::Init(const DexFile* dex_file,
                     String* location,
-                    GcRoot<String>* strings,
+                    StringDexCacheType* strings,
                     uint32_t num_strings,
                     GcRoot<Class>* resolved_types,
                     uint32_t num_resolved_types,
@@ -41,7 +41,7 @@
                     uint32_t num_resolved_methods,
                     ArtField** resolved_fields,
                     uint32_t num_resolved_fields,
-                    size_t pointer_size) {
+                    PointerSize pointer_size) {
   CHECK(dex_file != nullptr);
   CHECK(location != nullptr);
   CHECK_EQ(num_strings != 0u, strings != nullptr);
@@ -67,7 +67,7 @@
   }
 }
 
-void DexCache::Fixup(ArtMethod* trampoline, size_t pointer_size) {
+void DexCache::Fixup(ArtMethod* trampoline, PointerSize pointer_size) {
   // Fixup the resolve methods array to contain trampoline for resolution.
   CHECK(trampoline != nullptr);
   CHECK(trampoline->IsRuntimeMethod());
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 7912510..770c45d 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -35,11 +35,61 @@
 
 class String;
 
+struct PACKED(8) StringDexCachePair {
+  GcRoot<String> string_pointer;
+  uint32_t string_index;
+  // The array is initially [ {0,0}, {0,0}, {0,0} ... ]
+  // We maintain the invariant that once a dex cache entry is populated,
+  // the pointer is always non-0
+  // Any given entry would thus be:
+  // {non-0, non-0} OR {0,0}
+  //
+  // It's generally sufficiently enough then to check if the
+  // lookup string index matches the stored string index (for a >0 string index)
+  // because if it's true the pointer is also non-null.
+  //
+  // For the 0th entry which is a special case, the value is either
+  // {0,0} (initial state) or {non-0, 0} which indicates
+  // that a valid string is stored at that index for a dex string id of 0.
+  //
+  // As an optimization, we want to avoid branching on the string pointer since
+  // it's always non-null if the string id branch succeeds (except for the 0th string id).
+  // Set the initial state for the 0th entry to be {0,1} which is guaranteed to fail
+  // the lookup string id == stored id branch.
+  static void Initialize(StringDexCacheType* strings) {
+    DCHECK(StringDexCacheType().is_lock_free());
+    mirror::StringDexCachePair first_elem;
+    first_elem.string_pointer = GcRoot<String>(nullptr);
+    first_elem.string_index = 1;
+    strings[0].store(first_elem, std::memory_order_relaxed);
+  }
+  static GcRoot<String> LookupString(StringDexCacheType* dex_cache,
+                                     uint32_t string_idx,
+                                     uint32_t cache_size) {
+    StringDexCachePair index_string = dex_cache[string_idx % cache_size]
+        .load(std::memory_order_relaxed);
+    if (string_idx != index_string.string_index) return GcRoot<String>(nullptr);
+    DCHECK(!index_string.string_pointer.IsNull());
+    return index_string.string_pointer;
+  }
+};
+using StringDexCacheType = std::atomic<StringDexCachePair>;
+
+
 // C++ mirror of java.lang.DexCache.
 class MANAGED DexCache FINAL : public Object {
  public:
   // Size of java.lang.DexCache.class.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
+
+  // Size of string dex cache. Needs to be a power of 2 for entrypoint assumptions to hold.
+  static constexpr size_t kDexCacheStringCacheSize = 1024;
+  static_assert(IsPowerOfTwo(kDexCacheStringCacheSize),
+                "String dex cache size is not a power of 2.");
+
+  static constexpr size_t StaticStringSize() {
+    return kDexCacheStringCacheSize;
+  }
 
   // Size of an instance of java.lang.DexCache not including referenced values.
   static constexpr uint32_t InstanceSize() {
@@ -48,7 +98,7 @@
 
   void Init(const DexFile* dex_file,
             String* location,
-            GcRoot<String>* strings,
+            StringDexCacheType* strings,
             uint32_t num_strings,
             GcRoot<Class>* resolved_types,
             uint32_t num_resolved_types,
@@ -56,13 +106,13 @@
             uint32_t num_resolved_methods,
             ArtField** resolved_fields,
             uint32_t num_resolved_fields,
-            size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+            PointerSize pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void Fixup(ArtMethod* trampoline, size_t pointer_size)
+  void Fixup(ArtMethod* trampoline, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
-  void FixupStrings(GcRoot<mirror::String>* dest, const Visitor& visitor)
+  void FixupStrings(StringDexCacheType* dest, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
@@ -109,35 +159,37 @@
     return OFFSET_OF_OBJECT_MEMBER(DexCache, num_resolved_methods_);
   }
 
-  String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE
+  mirror::String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetResolvedString(uint32_t string_idx, String* resolved) ALWAYS_INLINE
+  void SetResolvedString(uint32_t string_idx, mirror::String* resolved) ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   Class* GetResolvedType(uint32_t type_idx) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetResolvedType(uint32_t type_idx, Class* resolved) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ArtMethod* GetResolvedMethod(uint32_t method_idx, size_t ptr_size)
+  ALWAYS_INLINE ArtMethod* GetResolvedMethod(uint32_t method_idx, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE void SetResolvedMethod(uint32_t method_idx, ArtMethod* resolved, size_t ptr_size)
+  ALWAYS_INLINE void SetResolvedMethod(uint32_t method_idx,
+                                       ArtMethod* resolved,
+                                       PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Pointer sized variant, used for patching.
-  ALWAYS_INLINE ArtField* GetResolvedField(uint32_t idx, size_t ptr_size)
+  ALWAYS_INLINE ArtField* GetResolvedField(uint32_t idx, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Pointer sized variant, used for patching.
-  ALWAYS_INLINE void SetResolvedField(uint32_t idx, ArtField* field, size_t ptr_size)
+  ALWAYS_INLINE void SetResolvedField(uint32_t idx, ArtField* field, PointerSize ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  GcRoot<String>* GetStrings() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldPtr<GcRoot<String>*>(StringsOffset());
+  StringDexCacheType* GetStrings() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetFieldPtr64<StringDexCacheType*>(StringsOffset());
   }
 
-  void SetStrings(GcRoot<String>* strings) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
+  void SetStrings(StringDexCacheType* strings) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
     SetFieldPtr<false>(StringsOffset(), strings);
   }
 
@@ -202,10 +254,10 @@
   // so they need to be public.
 
   template <typename PtrType>
-  static PtrType GetElementPtrSize(PtrType* ptr_array, size_t idx, size_t ptr_size);
+  static PtrType GetElementPtrSize(PtrType* ptr_array, size_t idx, PointerSize ptr_size);
 
   template <typename PtrType>
-  static void SetElementPtrSize(PtrType* ptr_array, size_t idx, PtrType ptr, size_t ptr_size);
+  static void SetElementPtrSize(PtrType* ptr_array, size_t idx, PtrType ptr, PointerSize ptr_size);
 
  private:
   // Visit instance fields of the dex cache as well as its associated arrays.
@@ -222,7 +274,8 @@
   uint64_t resolved_fields_;    // ArtField*, array with num_resolved_fields_ elements.
   uint64_t resolved_methods_;   // ArtMethod*, array with num_resolved_methods_ elements.
   uint64_t resolved_types_;     // GcRoot<Class>*, array with num_resolved_types_ elements.
-  uint64_t strings_;            // GcRoot<String>*, array with num_strings_ elements.
+  uint64_t strings_;            // std::atomic<StringDexCachePair>*,
+                                // array with num_strings_ elements.
   uint32_t num_resolved_fields_;    // Number of elements in the resolved_fields_ array.
   uint32_t num_resolved_methods_;   // Number of elements in the resolved_methods_ array.
   uint32_t num_resolved_types_;     // Number of elements in the resolved_types_ array.
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 48f2ca5..175997c 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -22,6 +22,7 @@
 #include "common_runtime_test.h"
 #include "linear_alloc.h"
 #include "mirror/class_loader-inl.h"
+#include "mirror/dex_cache-inl.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change.h"
 
@@ -40,7 +41,8 @@
                                                 Runtime::Current()->GetLinearAlloc())));
   ASSERT_TRUE(dex_cache.Get() != nullptr);
 
-  EXPECT_EQ(java_lang_dex_file_->NumStringIds(), dex_cache->NumStrings());
+  EXPECT_TRUE(dex_cache->StaticStringSize() == dex_cache->NumStrings()
+      || java_lang_dex_file_->NumStringIds() == dex_cache->NumStrings());
   EXPECT_EQ(java_lang_dex_file_->NumTypeIds(),   dex_cache->NumResolvedTypes());
   EXPECT_EQ(java_lang_dex_file_->NumMethodIds(), dex_cache->NumResolvedMethods());
   EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),  dex_cache->NumResolvedFields());
diff --git a/runtime/mirror/field-inl.h b/runtime/mirror/field-inl.h
index 8a0daec..8b0f8ce 100644
--- a/runtime/mirror/field-inl.h
+++ b/runtime/mirror/field-inl.h
@@ -27,9 +27,8 @@
 
 namespace mirror {
 
-template <bool kTransactionActive>
-inline mirror::Field* Field::CreateFromArtField(Thread* self, ArtField* field,
-                                                bool force_resolve) {
+template <PointerSize kPointerSize, bool kTransactionActive>
+inline mirror::Field* Field::CreateFromArtField(Thread* self, ArtField* field, bool force_resolve) {
   StackHandleScope<2> hs(self);
   // Try to resolve type before allocating since this is a thread suspension point.
   Handle<mirror::Class> type = hs.NewHandle(field->GetType<true>());
@@ -54,10 +53,8 @@
     self->AssertPendingOOMException();
     return nullptr;
   }
-  const auto pointer_size = kTransactionActive ?
-      Runtime::Current()->GetClassLinker()->GetImagePointerSize() : sizeof(void*);
   auto dex_field_index = field->GetDexFieldIndex();
-  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index, pointer_size);
+  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index, kPointerSize);
   if (field->GetDeclaringClass()->IsProxyClass()) {
     DCHECK(field->IsStatic());
     DCHECK_LT(dex_field_index, 2U);
@@ -70,7 +67,7 @@
     } else {
       // We rely on the field being resolved so that we can back to the ArtField
       // (i.e. FromReflectedMethod).
-      field->GetDexCache()->SetResolvedField(dex_field_index, field, pointer_size);
+      field->GetDexCache()->SetResolvedField(dex_field_index, field, kPointerSize);
     }
   }
   ret->SetType<kTransactionActive>(type.Get());
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index ff6847c..65f6b16 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -68,7 +68,7 @@
     }
   }
   mirror::DexCache* const dex_cache = declaring_class->GetDexCache();
-  ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), sizeof(void*));
+  ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), kRuntimePointerSize);
   CHECK(art_field != nullptr);
   CHECK_EQ(declaring_class, art_field->GetDeclaringClass());
   return art_field;
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index edaddbd..93fd7f1 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_FIELD_H_
 
 #include "accessible_object.h"
+#include "base/enums.h"
 #include "gc_root.h"
 #include "object.h"
 #include "object_callbacks.h"
@@ -92,7 +93,7 @@
   // Slow, try to use only for PrettyField and such.
   ArtField* GetArtField() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template <bool kTransactionActive = false>
+  template <PointerSize kPointerSize, bool kTransactionActive = false>
   static mirror::Field* CreateFromArtField(Thread* self, ArtField* field,
                                            bool force_resolve)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
diff --git a/runtime/mirror/method.cc b/runtime/mirror/method.cc
index 9838b71..ef16719 100644
--- a/runtime/mirror/method.cc
+++ b/runtime/mirror/method.cc
@@ -51,18 +51,25 @@
   array_class_ = GcRoot<Class>(nullptr);
 }
 
-template <bool kTransactionActive>
+template <PointerSize kPointerSize, bool kTransactionActive>
 Method* Method::CreateFromArtMethod(Thread* self, ArtMethod* method) {
   DCHECK(!method->IsConstructor()) << PrettyMethod(method);
   auto* ret = down_cast<Method*>(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod<kTransactionActive>(method);
+    static_cast<AbstractMethod*>(ret)->
+        CreateFromArtMethod<kPointerSize, kTransactionActive>(method);
   }
   return ret;
 }
 
-template Method* Method::CreateFromArtMethod<false>(Thread* self, ArtMethod* method);
-template Method* Method::CreateFromArtMethod<true>(Thread* self, ArtMethod* method);
+template Method* Method::CreateFromArtMethod<PointerSize::k32, false>(Thread* self,
+                                                                      ArtMethod* method);
+template Method* Method::CreateFromArtMethod<PointerSize::k32, true>(Thread* self,
+                                                                     ArtMethod* method);
+template Method* Method::CreateFromArtMethod<PointerSize::k64, false>(Thread* self,
+                                                                      ArtMethod* method);
+template Method* Method::CreateFromArtMethod<PointerSize::k64, true>(Thread* self,
+                                                                     ArtMethod* method);
 
 void Method::VisitRoots(RootVisitor* visitor) {
   static_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
@@ -96,18 +103,25 @@
   array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
-template <bool kTransactionActive>
+template <PointerSize kPointerSize, bool kTransactionActive>
 Constructor* Constructor::CreateFromArtMethod(Thread* self, ArtMethod* method) {
   DCHECK(method->IsConstructor()) << PrettyMethod(method);
   auto* ret = down_cast<Constructor*>(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod<kTransactionActive>(method);
+    static_cast<AbstractMethod*>(ret)->
+        CreateFromArtMethod<kPointerSize, kTransactionActive>(method);
   }
   return ret;
 }
 
-template Constructor* Constructor::CreateFromArtMethod<false>(Thread* self, ArtMethod* method);
-template Constructor* Constructor::CreateFromArtMethod<true>(Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<PointerSize::k32, false>(
+    Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<PointerSize::k32, true>(
+    Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<PointerSize::k64, false>(
+    Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<PointerSize::k64, true>(
+    Thread* self, ArtMethod* method);
 
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/method.h b/runtime/mirror/method.h
index 0b56964..be51784 100644
--- a/runtime/mirror/method.h
+++ b/runtime/mirror/method.h
@@ -28,7 +28,7 @@
 // C++ mirror of java.lang.reflect.Method.
 class MANAGED Method : public AbstractMethod {
  public:
-  template <bool kTransactionActive = false>
+  template <PointerSize kPointerSize, bool kTransactionActive>
   static Method* CreateFromArtMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -60,7 +60,7 @@
 // C++ mirror of java.lang.reflect.Constructor.
 class MANAGED Constructor: public AbstractMethod {
  public:
-  template <bool kTransactionActive = false>
+  template <PointerSize kPointerSize, bool kTransactionActive>
   static Constructor* CreateFromArtMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 76a36ac..0495c95 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -40,7 +40,7 @@
 namespace art {
 namespace mirror {
 
-inline uint32_t Object::ClassSize(size_t pointer_size) {
+inline uint32_t Object::ClassSize(PointerSize pointer_size) {
   uint32_t vtable_entries = kVTableLength;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
@@ -106,7 +106,11 @@
 }
 
 inline mirror::Object* Object::MonitorEnter(Thread* self) {
-  return Monitor::MonitorEnter(self, this);
+  return Monitor::MonitorEnter(self, this, /*trylock*/false);
+}
+
+inline mirror::Object* Object::MonitorTryEnter(Thread* self) {
+  return Monitor::MonitorEnter(self, this, /*trylock*/true);
 }
 
 inline bool Object::MonitorExit(Thread* self) {
@@ -143,10 +147,20 @@
 #endif
 }
 
+inline uint32_t Object::GetMarkBit() {
+#ifdef USE_READ_BARRIER
+  return GetLockWord(false).MarkBitState();
+#else
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+#endif
+}
+
 inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
   DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
   LockWord lw = GetLockWord(false);
   lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
   SetLockWord(lw, false);
@@ -169,6 +183,8 @@
   DCHECK(kUseBakerReadBarrier);
   DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
   DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
   LockWord expected_lw;
   LockWord new_lw;
   do {
@@ -212,6 +228,24 @@
 #endif
 }
 
+inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    new_lw = lw;
+    new_lw.SetMarkBitState(mark_bit);
+    // Since this is only set from the mutator, we can use the non release Cas.
+  } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
+  return true;
+}
+
+
 inline void Object::AssertReadBarrierPointer() const {
   if (kUseBakerReadBarrier) {
     Object* obj = const_cast<Object*>(this);
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 701c600..13c536e 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -163,8 +163,7 @@
       case LockWord::kUnlocked: {
         // Try to compare and swap in a new hash, if we succeed we will return the hash on the next
         // loop iteration.
-        LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(),
-                                                    lw.ReadBarrierState());
+        LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), lw.GCState());
         DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
         if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) {
           return hash_word.GetHashCode();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 0ee46c3..5b129bf 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_OBJECT_H_
 
 #include "base/casts.h"
+#include "base/enums.h"
 #include "globals.h"
 #include "object_reference.h"
 #include "offsets.h"
@@ -74,7 +75,7 @@
   static constexpr size_t kVTableLength = 11;
 
   // The size of the java.lang.Class representing a java.lang.Object.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Size of an instance of java.lang.Object.
   static constexpr uint32_t InstanceSize() {
@@ -92,6 +93,7 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // TODO: Clean this up and change to return int32_t
   Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
 
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
@@ -102,6 +104,12 @@
   template<bool kCasRelease = false>
   ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE uint32_t GetMarkBit() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
@@ -140,6 +148,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
+  // Try to enter the monitor, returns non null if we succeeded.
+  mirror::Object* MonitorTryEnter(Thread* self)
+      EXCLUSIVE_LOCK_FUNCTION()
+      REQUIRES(!Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* MonitorEnter(Thread* self)
       EXCLUSIVE_LOCK_FUNCTION()
       REQUIRES(!Roles::uninterruptible_)
@@ -468,7 +481,7 @@
   void SetFieldPtr(MemberOffset field_offset, T new_value)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     SetFieldPtrWithSize<kTransactionActive, kCheckTransaction, kVerifyFlags>(
-        field_offset, new_value, sizeof(void*));
+        field_offset, new_value, kRuntimePointerSize);
   }
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
@@ -480,11 +493,11 @@
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
-  ALWAYS_INLINE void SetFieldPtrWithSize(MemberOffset field_offset, T new_value,
-                                         size_t pointer_size)
+  ALWAYS_INLINE void SetFieldPtrWithSize(MemberOffset field_offset,
+                                         T new_value,
+                                         PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
-    if (pointer_size == 4) {
+    if (pointer_size == PointerSize::k32) {
       intptr_t ptr  = reinterpret_cast<intptr_t>(new_value);
       DCHECK_EQ(static_cast<int32_t>(ptr), ptr);  // Check that we dont lose any non 0 bits.
       SetField32<kTransactionActive, kCheckTransaction, kVerifyFlags>(
@@ -516,19 +529,19 @@
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
   T GetFieldPtr(MemberOffset field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset, sizeof(void*));
+    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset, kRuntimePointerSize);
   }
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
   T GetFieldPtr64(MemberOffset field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset, 8u);
+    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset,
+                                                             PointerSize::k64);
   }
 
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  ALWAYS_INLINE T GetFieldPtrWithSize(MemberOffset field_offset, size_t pointer_size)
+  ALWAYS_INLINE T GetFieldPtrWithSize(MemberOffset field_offset, PointerSize pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
-    if (pointer_size == 4) {
+    if (pointer_size == PointerSize::k32) {
       return reinterpret_cast<T>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
     } else {
       int64_t v = GetField64<kVerifyFlags, kIsVolatile>(field_offset);
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 4257396..a99d616 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -26,7 +26,7 @@
 class MANAGED ObjectArray: public Array {
  public:
   // The size of Object[].class.
-  static uint32_t ClassSize(size_t pointer_size) {
+  static uint32_t ClassSize(PointerSize pointer_size) {
     return Array::ClassSize(pointer_size);
   }
 
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index c1284a6..0034220 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -24,6 +24,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "asm_support.h"
+#include "base/enums.h"
 #include "class-inl.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -78,9 +79,11 @@
   EXPECT_EQ(kObjectReferenceSize, sizeof(HeapReference<Object>));
   EXPECT_EQ(kObjectHeaderSize, sizeof(Object));
   EXPECT_EQ(ART_METHOD_QUICK_CODE_OFFSET_32,
-            ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value());
+            ArtMethod::EntryPointFromQuickCompiledCodeOffset(PointerSize::k32).
+                Int32Value());
   EXPECT_EQ(ART_METHOD_QUICK_CODE_OFFSET_64,
-            ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value());
+            ArtMethod::EntryPointFromQuickCompiledCodeOffset(PointerSize::k64).
+                Int32Value());
 }
 
 TEST_F(ObjectTest, IsInSamePackage) {
@@ -306,7 +309,7 @@
   // pretend we are trying to call 'new char[3]' from String.toCharArray
   ScopedObjectAccess soa(Thread::Current());
   Class* java_util_Arrays = class_linker_->FindSystemClass(soa.Self(), "Ljava/util/Arrays;");
-  ArtMethod* sort = java_util_Arrays->FindDirectMethod("sort", "([I)V", sizeof(void*));
+  ArtMethod* sort = java_util_Arrays->FindDirectMethod("sort", "([I)V", kRuntimePointerSize);
   const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId("[I");
   ASSERT_TRUE(type_id != nullptr);
   uint32_t type_idx = java_lang_dex_file_->GetIndexForTypeId(*type_id);
@@ -363,7 +366,7 @@
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<ClassLoader*>(class_loader)));
   Class* klass = class_linker_->FindClass(soa.Self(), "LStaticsFromCode;", loader);
-  ArtMethod* clinit = klass->FindClassInitializer(sizeof(void*));
+  ArtMethod* clinit = klass->FindClassInitializer(kRuntimePointerSize);
   const DexFile::TypeId* klass_type_id = dex_file->FindTypeId("LStaticsFromCode;");
   ASSERT_TRUE(klass_type_id != nullptr);
 
@@ -499,22 +502,22 @@
   Class* klass2 = linker->FindClass(soa.Self(), "LProtoCompare2;", class_loader_2);
   ASSERT_TRUE(klass2 != nullptr);
 
-  ArtMethod* m1_1 = klass1->GetVirtualMethod(0, sizeof(void*));
+  ArtMethod* m1_1 = klass1->GetVirtualMethod(0, kRuntimePointerSize);
   EXPECT_STREQ(m1_1->GetName(), "m1");
-  ArtMethod* m2_1 = klass1->GetVirtualMethod(1, sizeof(void*));
+  ArtMethod* m2_1 = klass1->GetVirtualMethod(1, kRuntimePointerSize);
   EXPECT_STREQ(m2_1->GetName(), "m2");
-  ArtMethod* m3_1 = klass1->GetVirtualMethod(2, sizeof(void*));
+  ArtMethod* m3_1 = klass1->GetVirtualMethod(2, kRuntimePointerSize);
   EXPECT_STREQ(m3_1->GetName(), "m3");
-  ArtMethod* m4_1 = klass1->GetVirtualMethod(3, sizeof(void*));
+  ArtMethod* m4_1 = klass1->GetVirtualMethod(3, kRuntimePointerSize);
   EXPECT_STREQ(m4_1->GetName(), "m4");
 
-  ArtMethod* m1_2 = klass2->GetVirtualMethod(0, sizeof(void*));
+  ArtMethod* m1_2 = klass2->GetVirtualMethod(0, kRuntimePointerSize);
   EXPECT_STREQ(m1_2->GetName(), "m1");
-  ArtMethod* m2_2 = klass2->GetVirtualMethod(1, sizeof(void*));
+  ArtMethod* m2_2 = klass2->GetVirtualMethod(1, kRuntimePointerSize);
   EXPECT_STREQ(m2_2->GetName(), "m2");
-  ArtMethod* m3_2 = klass2->GetVirtualMethod(2, sizeof(void*));
+  ArtMethod* m3_2 = klass2->GetVirtualMethod(2, kRuntimePointerSize);
   EXPECT_STREQ(m3_2->GetName(), "m3");
-  ArtMethod* m4_2 = klass2->GetVirtualMethod(3, sizeof(void*));
+  ArtMethod* m4_2 = klass2->GetVirtualMethod(3, kRuntimePointerSize);
   EXPECT_STREQ(m4_2->GetName(), "m4");
 }
 
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index 12bfe38..039989b 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -22,7 +22,7 @@
 namespace art {
 namespace mirror {
 
-inline uint32_t Reference::ClassSize(size_t pointer_size) {
+inline uint32_t Reference::ClassSize(PointerSize pointer_size) {
   uint32_t vtable_entries = Object::kVTableLength + 4;
   return Class::ComputeClassSize(false, vtable_entries, 2, 0, 0, 0, 0, pointer_size);
 }
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 3baa12e..38c6616 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_REFERENCE_H_
 #define ART_RUNTIME_MIRROR_REFERENCE_H_
 
+#include "base/enums.h"
 #include "class.h"
 #include "gc_root.h"
 #include "object.h"
@@ -43,7 +44,7 @@
 class MANAGED Reference : public Object {
  public:
   // Size of java.lang.ref.Reference.class.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Size of an instance of java.lang.ref.Reference.
   static constexpr uint32_t InstanceSize() {
@@ -76,8 +77,9 @@
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Reference* GetPendingNext() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObject<Reference>(PendingNextOffset());
+    return GetFieldObject<Reference, kDefaultVerifyFlags, kReadBarrierOption>(PendingNextOffset());
   }
 
   void SetPendingNext(Reference* pending_next)
@@ -102,7 +104,7 @@
   // removed from the list after having determined the reference is not ready
   // to be enqueued on a java ReferenceQueue.
   bool IsUnprocessed() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetPendingNext() == nullptr;
+    return GetPendingNext<kWithoutReadBarrier>() == nullptr;
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 6285542..d3660e5 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -20,6 +20,7 @@
 #include "array.h"
 #include "base/bit_utils.h"
 #include "class.h"
+#include "common_throws.h"
 #include "gc/heap-inl.h"
 #include "globals.h"
 #include "intern_table.h"
@@ -32,8 +33,8 @@
 namespace art {
 namespace mirror {
 
-inline uint32_t String::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 56;
+inline uint32_t String::ClassSize(PointerSize pointer_size) {
+  uint32_t vtable_entries = Object::kVTableLength + 57;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 2, pointer_size);
 }
 
@@ -134,9 +135,7 @@
 inline uint16_t String::CharAt(int32_t index) {
   int32_t count = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_));
   if (UNLIKELY((index < 0) || (index >= count))) {
-    Thread* self = Thread::Current();
-    self->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
-                             "length=%i; index=%i", count, index);
+    ThrowStringIndexOutOfBoundsException(index, count);
     return 0;
   }
   return GetValue()[index];
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index e2cfb8d..d492ba3 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -35,7 +35,7 @@
 class MANAGED String FINAL : public Object {
  public:
   // Size of java.lang.String.class.
-  static uint32_t ClassSize(size_t pointer_size);
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Size of an instance of java.lang.String not including its value array.
   static constexpr uint32_t InstanceSize() {
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index f068b3e..0bccc8b 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -17,6 +17,7 @@
 #include "throwable.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class-inl.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
@@ -106,7 +107,7 @@
     if (depth == 0) {
       result += "(Throwable with empty stack trace)";
     } else {
-      const size_t ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      const PointerSize ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       for (int32_t i = 0; i < depth; ++i) {
         ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, ptr_size);
         uintptr_t dex_pc = method_trace->GetElementPtrSize<uintptr_t>(i + depth, ptr_size);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 0f56705..e863ea9 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -155,7 +155,7 @@
       return false;
     }
   }
-  LockWord fat(this, lw.ReadBarrierState());
+  LockWord fat(this, lw.GCState());
   // Publish the updated lock word, which may race with other threads.
   bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat);
   // Lock profiling.
@@ -220,7 +220,7 @@
 struct NthCallerWithDexPcVisitor FINAL : public StackVisitor {
   explicit NthCallerWithDexPcVisitor(Thread* thread, size_t frame)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve),
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         method_(nullptr),
         dex_pc_(0),
         current_frame_number_(0),
@@ -300,7 +300,7 @@
                                           uint32_t owners_dex_pc,
                                           size_t num_waiters) {
   const char* owners_filename;
-  int32_t owners_line_number;
+  int32_t owners_line_number = 0;
   if (owners_method != nullptr) {
     TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
   }
@@ -314,21 +314,34 @@
   return oss.str();
 }
 
+bool Monitor::TryLockLocked(Thread* self) {
+  if (owner_ == nullptr) {  // Unowned.
+    owner_ = self;
+    CHECK_EQ(lock_count_, 0);
+    // When debugging, save the current monitor holder for future
+    // acquisition failures to use in sampled logging.
+    if (lock_profiling_threshold_ != 0) {
+      locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
+    }
+  } else if (owner_ == self) {  // Recursive.
+    lock_count_++;
+  } else {
+    return false;
+  }
+  AtraceMonitorLock(self, GetObject(), false /* is_wait */);
+  return true;
+}
+
+bool Monitor::TryLock(Thread* self) {
+  MutexLock mu(self, monitor_lock_);
+  return TryLockLocked(self);
+}
+
 void Monitor::Lock(Thread* self) {
   MutexLock mu(self, monitor_lock_);
   while (true) {
-    if (owner_ == nullptr) {  // Unowned.
-      owner_ = self;
-      CHECK_EQ(lock_count_, 0);
-      // When debugging, save the current monitor holder for future
-      // acquisition failures to use in sampled logging.
-      if (lock_profiling_threshold_ != 0) {
-        locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
-      }
-      break;
-    } else if (owner_ == self) {  // Recursive.
-      lock_count_++;
-      break;
+    if (TryLockLocked(self)) {
+      return;
     }
     // Contended.
     const bool log_contention = (lock_profiling_threshold_ != 0);
@@ -430,8 +443,6 @@
     monitor_lock_.Lock(self);  // Reacquire locks in order.
     --num_waiters_;
   }
-
-  AtraceMonitorLock(self, GetObject(), false /* is_wait */);
 }
 
 static void ThrowIllegalMonitorStateExceptionF(const char* fmt, ...)
@@ -763,20 +774,21 @@
         return false;
       }
       // Deflate to a thin lock.
-      LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_,
-                                                 lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(),
+                                                 monitor->lock_count_,
+                                                 lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / "
           << monitor->lock_count_;
     } else if (monitor->HasHashCode()) {
-      LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode();
     } else {
       // No lock and no hash, just put an empty lock word inside the object.
-      LockWord new_lw = LockWord::FromDefault(lw.ReadBarrierState());
+      LockWord new_lw = LockWord::FromDefault(lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated" << obj << " to empty lock word";
@@ -852,7 +864,7 @@
   return obj;
 }
 
-mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) {
+mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj, bool trylock) {
   DCHECK(self != nullptr);
   DCHECK(obj != nullptr);
   self->AssertThreadSuspensionIsAllowable();
@@ -865,7 +877,7 @@
     LockWord lock_word = h_obj->GetLockWord(true);
     switch (lock_word.GetState()) {
       case LockWord::kUnlocked: {
-        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState()));
+        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState()));
         if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
           AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
           // CasLockWord enforces more than the acquire ordering we need here.
@@ -879,8 +891,9 @@
           // We own the lock, increase the recursion count.
           uint32_t new_count = lock_word.ThinLockCount() + 1;
           if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
-            LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count,
-                                                          lock_word.ReadBarrierState()));
+            LockWord thin_locked(LockWord::FromThinLockId(thread_id,
+                                                          new_count,
+                                                          lock_word.GCState()));
             if (!kUseReadBarrier) {
               h_obj->SetLockWord(thin_locked, true);
               AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
@@ -898,6 +911,9 @@
             InflateThinLocked(self, h_obj, lock_word, 0);
           }
         } else {
+          if (trylock) {
+            return nullptr;
+          }
           // Contention.
           contention_count++;
           Runtime* runtime = Runtime::Current();
@@ -916,8 +932,12 @@
       }
       case LockWord::kFatLocked: {
         Monitor* mon = lock_word.FatLockMonitor();
-        mon->Lock(self);
-        return h_obj.Get();  // Success!
+        if (trylock) {
+          return mon->TryLock(self) ? h_obj.Get() : nullptr;
+        } else {
+          mon->Lock(self);
+          return h_obj.Get();  // Success!
+        }
       }
       case LockWord::kHashCode:
         // Inflate with the existing hashcode.
@@ -957,9 +977,9 @@
           LockWord new_lw = LockWord::Default();
           if (lock_word.ThinLockCount() != 0) {
             uint32_t new_count = lock_word.ThinLockCount() - 1;
-            new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.ReadBarrierState());
+            new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.GCState());
           } else {
-            new_lw = LockWord::FromDefault(lock_word.ReadBarrierState());
+            new_lw = LockWord::FromDefault(lock_word.GCState());
           }
           if (!kUseReadBarrier) {
             DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 7b4b8f9..1d829e1 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -62,7 +62,7 @@
       NO_THREAD_SAFETY_ANALYSIS;  // TODO: Reading lock owner without holding lock is racy.
 
   // NO_THREAD_SAFETY_ANALYSIS for mon->Lock.
-  static mirror::Object* MonitorEnter(Thread* thread, mirror::Object* obj)
+  static mirror::Object* MonitorEnter(Thread* thread, mirror::Object* obj, bool trylock)
       EXCLUSIVE_LOCK_FUNCTION(obj)
       NO_THREAD_SAFETY_ANALYSIS
       REQUIRES(!Roles::uninterruptible_)
@@ -193,6 +193,15 @@
                !monitor_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Try to lock without blocking, returns true if we acquired the lock.
+  bool TryLock(Thread* self)
+      REQUIRES(!monitor_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  // Variant for already holding the monitor lock.
+  bool TryLockLocked(Thread* self)
+      REQUIRES(monitor_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void Lock(Thread* self)
       REQUIRES(!monitor_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/monitor_test.cc b/runtime/monitor_test.cc
index 83e0c0d..48d256c 100644
--- a/runtime/monitor_test.cc
+++ b/runtime/monitor_test.cc
@@ -26,6 +26,7 @@
 #include "handle_scope-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"  // Strings are easiest to allocate
+#include "object_lock.h"
 #include "scoped_thread_state_change.h"
 #include "thread_pool.h"
 
@@ -374,4 +375,60 @@
                   "Monitor test thread pool 3");
 }
 
+class TryLockTask : public Task {
+ public:
+  explicit TryLockTask(Handle<mirror::Object> obj) : obj_(obj) {}
+
+  void Run(Thread* self) {
+    ScopedObjectAccess soa(self);
+    // Lock is held by other thread, try lock should fail.
+    ObjectTryLock<mirror::Object> lock(self, obj_);
+    EXPECT_FALSE(lock.Acquired());
+  }
+
+  void Finalize() {
+    delete this;
+  }
+
+ private:
+  Handle<mirror::Object> obj_;
+};
+
+// Test trylock in deadlock scenarios.
+TEST_F(MonitorTest, TestTryLock) {
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
+  Thread* const self = Thread::Current();
+  ThreadPool thread_pool("the pool", 2);
+  ScopedObjectAccess soa(self);
+  StackHandleScope<3> hs(self);
+  Handle<mirror::Object> obj1(
+      hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
+  Handle<mirror::Object> obj2(
+      hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
+  {
+    ObjectLock<mirror::Object> lock1(self, obj1);
+    ObjectLock<mirror::Object> lock2(self, obj1);
+    {
+      ObjectTryLock<mirror::Object> trylock(self, obj1);
+      EXPECT_TRUE(trylock.Acquired());
+    }
+    // Test failure case.
+    thread_pool.AddTask(self, new TryLockTask(obj1));
+    thread_pool.StartWorkers(self);
+    ScopedThreadSuspension sts(self, kSuspended);
+    thread_pool.Wait(Thread::Current(), /*do_work*/false, /*may_hold_locks*/false);
+  }
+  // Test that the trylock actually locks the object.
+  {
+    ObjectTryLock<mirror::Object> trylock(self, obj1);
+    EXPECT_TRUE(trylock.Acquired());
+    obj1->Notify(self);
+    // Since we hold the lock there should be no monitor state exeception.
+    self->AssertNoPendingException();
+  }
+  thread_pool.StopWorkers(self);
+}
+
+
 }  // namespace art
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 4bb83b6..b2349fc 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -33,7 +33,6 @@
 #include "oat_file_assistant.h"
 #include "oat_file_manager.h"
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -379,13 +378,13 @@
   // TODO: Verify the dex location is well formed, and throw an IOException if
   // not?
 
-  OatFileAssistant oat_file_assistant(filename, target_instruction_set, profile_changed, false);
+  OatFileAssistant oat_file_assistant(filename, target_instruction_set, false);
 
   // Always treat elements of the bootclasspath as up-to-date.
   if (oat_file_assistant.IsInBootClassPath()) {
     return OatFileAssistant::kNoDexOptNeeded;
   }
-  return oat_file_assistant.GetDexOptNeeded(filter);
+  return oat_file_assistant.GetDexOptNeeded(filter, profile_changed);
 }
 
 static jstring DexFile_getDexFileStatus(JNIEnv* env,
@@ -412,7 +411,6 @@
   }
 
   OatFileAssistant oat_file_assistant(filename.c_str(), target_instruction_set,
-                                      false /* profile_changed */,
                                       false /* load_executable */);
 
   std::ostringstream status;
@@ -487,7 +485,7 @@
     return JNI_FALSE;
   }
 
-  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false);
   return oat_file_assistant.IsUpToDate() ? JNI_FALSE : JNI_TRUE;
 }
 
@@ -579,7 +577,6 @@
 
   OatFileAssistant oat_file_assistant(filename.c_str(),
                                       target_instruction_set,
-                                      false /* profile_changed */,
                                       false /* load_executable */);
 
   std::unique_ptr<OatFile> best_oat_file = oat_file_assistant.GetBestOatFile();
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 759d6fa..45e49e2 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -16,7 +16,7 @@
 
 #include "dalvik_system_VMRuntime.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 extern "C" void android_set_application_target_sdk_version(uint32_t version);
 #endif
 #include <limits.h>
@@ -29,6 +29,7 @@
 
 #include "art_method-inl.h"
 #include "arch/instruction_set.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "debugger.h"
@@ -200,7 +201,7 @@
   // Note that targetSdkVersion may be 0, meaning "current".
   Runtime::Current()->SetTargetSdkVersion(target_sdk_version);
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // This part is letting libc/dynamic linker know about current app's
   // target sdk version to enable compatibility workarounds.
   android_set_application_target_sdk_version(static_cast<uint32_t>(target_sdk_version));
@@ -329,7 +330,7 @@
 static void PreloadDexCachesResolveField(Handle<mirror::DexCache> dex_cache, uint32_t field_idx,
                                          bool is_static)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  ArtField* field = dex_cache->GetResolvedField(field_idx, sizeof(void*));
+  ArtField* field = dex_cache->GetResolvedField(field_idx, kRuntimePointerSize);
   if (field != nullptr) {
     return;
   }
@@ -342,7 +343,7 @@
     return;
   }
   if (is_static) {
-    field = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+    field = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
   } else {
     field = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
@@ -350,14 +351,14 @@
     return;
   }
   // LOG(INFO) << "VMRuntime.preloadDexCaches resolved field " << PrettyField(field);
-  dex_cache->SetResolvedField(field_idx, field, sizeof(void*));
+  dex_cache->SetResolvedField(field_idx, field, kRuntimePointerSize);
 }
 
 // Based on ClassLinker::ResolveMethod.
 static void PreloadDexCachesResolveMethod(Handle<mirror::DexCache> dex_cache, uint32_t method_idx,
                                           InvokeType invoke_type)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  ArtMethod* method = dex_cache->GetResolvedMethod(method_idx, sizeof(void*));
+  ArtMethod* method = dex_cache->GetResolvedMethod(method_idx, kRuntimePointerSize);
   if (method != nullptr) {
     return;
   }
@@ -370,14 +371,14 @@
   switch (invoke_type) {
     case kDirect:
     case kStatic:
-      method = klass->FindDirectMethod(dex_cache.Get(), method_idx, sizeof(void*));
+      method = klass->FindDirectMethod(dex_cache.Get(), method_idx, kRuntimePointerSize);
       break;
     case kInterface:
-      method = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, sizeof(void*));
+      method = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, kRuntimePointerSize);
       break;
     case kSuper:
     case kVirtual:
-      method = klass->FindVirtualMethod(dex_cache.Get(), method_idx, sizeof(void*));
+      method = klass->FindVirtualMethod(dex_cache.Get(), method_idx, kRuntimePointerSize);
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << invoke_type;
@@ -387,7 +388,7 @@
     return;
   }
   // LOG(INFO) << "VMRuntime.preloadDexCaches resolved method " << PrettyMethod(method);
-  dex_cache->SetResolvedMethod(method_idx, method, sizeof(void*));
+  dex_cache->SetResolvedMethod(method_idx, method, kRuntimePointerSize);
 }
 
 struct DexCacheStats {
@@ -462,7 +463,7 @@
       }
     }
     for (size_t j = 0; j < dex_cache->NumResolvedMethods(); j++) {
-      ArtMethod* method = dex_cache->GetResolvedMethod(j, sizeof(void*));
+      ArtMethod* method = dex_cache->GetResolvedMethod(j, kRuntimePointerSize);
       if (method != nullptr) {
         filled->num_methods++;
       }
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 9da44a4..fe3cbe7 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -18,8 +18,6 @@
 
 #include <stdlib.h>
 
-#include <cutils/process_name.h>
-
 #include "arch/instruction_set.h"
 #include "debugger.h"
 #include "java_vm_ext.h"
@@ -178,12 +176,17 @@
     // Only restart if it was streaming mode.
     // TODO: Expose buffer size, so we can also do file mode.
     if (output_mode == Trace::TraceOutputMode::kStreaming) {
-      const char* proc_name_cutils = get_process_name();
+      static constexpr size_t kMaxProcessNameLength = 100;
+      char name_buf[kMaxProcessNameLength] = {};
+      int rc = pthread_getname_np(pthread_self(), name_buf, kMaxProcessNameLength);
       std::string proc_name;
-      if (proc_name_cutils != nullptr) {
-        proc_name = proc_name_cutils;
+
+      if (rc == 0) {
+          // On success use the pthread name.
+          proc_name = name_buf;
       }
-      if (proc_name_cutils == nullptr || proc_name == "zygote" || proc_name == "zygote64") {
+
+      if (proc_name.empty() || proc_name == "zygote" || proc_name == "zygote64") {
         // Either no process name, or the name hasn't been changed, yet. Just use pid.
         pid_t pid = getpid();
         proc_name = StringPrintf("%u", static_cast<uint32_t>(pid));
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 0624da3..6d5e7c7 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -19,6 +19,7 @@
 #include <iostream>
 
 #include "art_field-inl.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
@@ -136,7 +137,9 @@
   }
   for (ArtField& field : ifields) {
     if (!public_only || field.IsPublic()) {
-      auto* reflect_field = mirror::Field::CreateFromArtField(self, &field, force_resolve);
+      auto* reflect_field = mirror::Field::CreateFromArtField<kRuntimePointerSize>(self,
+                                                                                   &field,
+                                                                                   force_resolve);
       if (reflect_field == nullptr) {
         if (kIsDebugBuild) {
           self->AssertPendingException();
@@ -149,7 +152,9 @@
   }
   for (ArtField& field : sfields) {
     if (!public_only || field.IsPublic()) {
-      auto* reflect_field = mirror::Field::CreateFromArtField(self, &field, force_resolve);
+      auto* reflect_field = mirror::Field::CreateFromArtField<kRuntimePointerSize>(self,
+                                                                                   &field,
+                                                                                   force_resolve);
       if (reflect_field == nullptr) {
         if (kIsDebugBuild) {
           self->AssertPendingException();
@@ -222,11 +227,11 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* art_field = FindFieldByName(self, name, c->GetIFieldsPtr());
   if (art_field != nullptr) {
-    return mirror::Field::CreateFromArtField(self, art_field, true);
+    return mirror::Field::CreateFromArtField<kRuntimePointerSize>(self, art_field, true);
   }
   art_field = FindFieldByName(self, name, c->GetSFieldsPtr());
   if (art_field != nullptr) {
-    return mirror::Field::CreateFromArtField(self, art_field, true);
+    return mirror::Field::CreateFromArtField<kRuntimePointerSize>(self, art_field, true);
   }
   return nullptr;
 }
@@ -323,7 +328,10 @@
 static jobject Class_getDeclaredConstructorInternal(
     JNIEnv* env, jobject javaThis, jobjectArray args) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::Constructor* result = mirror::Class::GetDeclaredConstructorInternal(
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  mirror::Constructor* result = mirror::Class::GetDeclaredConstructorInternal<kRuntimePointerSize,
+                                                                              false>(
       soa.Self(),
       DecodeClass(soa, javaThis),
       soa.Decode<mirror::ObjectArray<mirror::Class>*>(args));
@@ -343,7 +351,7 @@
   Handle<mirror::Class> h_klass = hs.NewHandle(DecodeClass(soa, javaThis));
   size_t constructor_count = 0;
   // Two pass approach for speed.
-  for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) {
+  for (auto& m : h_klass->GetDirectMethods(kRuntimePointerSize)) {
     constructor_count += MethodMatchesConstructor(&m, publicOnly != JNI_FALSE) ? 1u : 0u;
   }
   auto h_constructors = hs.NewHandle(mirror::ObjectArray<mirror::Constructor>::Alloc(
@@ -353,9 +361,12 @@
     return nullptr;
   }
   constructor_count = 0;
-  for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) {
+  for (auto& m : h_klass->GetDirectMethods(kRuntimePointerSize)) {
     if (MethodMatchesConstructor(&m, publicOnly != JNI_FALSE)) {
-      auto* constructor = mirror::Constructor::CreateFromArtMethod(soa.Self(), &m);
+      DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+      DCHECK(!Runtime::Current()->IsActiveTransaction());
+      auto* constructor = mirror::Constructor::CreateFromArtMethod<kRuntimePointerSize, false>(
+          soa.Self(), &m);
       if (UNLIKELY(constructor == nullptr)) {
         soa.Self()->AssertPendingOOMException();
         return nullptr;
@@ -369,7 +380,9 @@
 static jobject Class_getDeclaredMethodInternal(JNIEnv* env, jobject javaThis,
                                                jobject name, jobjectArray args) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::Method* result = mirror::Class::GetDeclaredMethodInternal(
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  mirror::Method* result = mirror::Class::GetDeclaredMethodInternal<kRuntimePointerSize, false>(
       soa.Self(),
       DecodeClass(soa, javaThis),
       soa.Decode<mirror::String*>(name),
@@ -383,7 +396,7 @@
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass = hs.NewHandle(DecodeClass(soa, javaThis));
   size_t num_methods = 0;
-  for (auto& m : klass->GetDeclaredMethods(sizeof(void*))) {
+  for (auto& m : klass->GetDeclaredMethods(kRuntimePointerSize)) {
     auto modifiers = m.GetAccessFlags();
     // Add non-constructor declared methods.
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
@@ -394,11 +407,14 @@
   auto ret = hs.NewHandle(mirror::ObjectArray<mirror::Method>::Alloc(
       soa.Self(), mirror::Method::ArrayClass(), num_methods));
   num_methods = 0;
-  for (auto& m : klass->GetDeclaredMethods(sizeof(void*))) {
+  for (auto& m : klass->GetDeclaredMethods(kRuntimePointerSize)) {
     auto modifiers = m.GetAccessFlags();
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
         (modifiers & kAccConstructor) == 0) {
-      auto* method = mirror::Method::CreateFromArtMethod(soa.Self(), &m);
+      DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+      DCHECK(!Runtime::Current()->IsActiveTransaction());
+      auto* method =
+          mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), &m);
       if (method == nullptr) {
         soa.Self()->AssertPendingException();
         return nullptr;
@@ -610,7 +626,7 @@
   auto* constructor = klass->GetDeclaredConstructor(
       soa.Self(),
       ScopedNullHandle<mirror::ObjectArray<mirror::Class>>(),
-      sizeof(void*));
+      kRuntimePointerSize);
   if (UNLIKELY(constructor == nullptr)) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
                                    "%s has no zero argument constructor",
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index 994ccb1..f0140a3 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -59,7 +59,7 @@
 static jobject DexCache_getResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
-  CHECK_LT(static_cast<size_t>(string_index), dex_cache->NumStrings());
+  CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
   return soa.AddLocalReference<jobject>(dex_cache->GetResolvedString(string_index));
 }
 
@@ -75,7 +75,7 @@
                                        jobject string) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
-  CHECK_LT(static_cast<size_t>(string_index), dex_cache->NumStrings());
+  CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
   dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String*>(string));
 }
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 54b8afd..dd46233 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -17,6 +17,7 @@
 #include "java_lang_reflect_Constructor.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "jni_internal.h"
@@ -65,7 +66,7 @@
 static jobjectArray Constructor_getExceptionTypes(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod)
-      ->GetInterfaceMethodIfProxy(sizeof(void*));
+      ->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   mirror::ObjectArray<mirror::Class>* result_array =
       method->GetDexFile()->GetExceptionTypesForMethod(method);
   if (result_array == nullptr) {
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index 78999c2..c3f2a27 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -17,6 +17,7 @@
 #include "java_lang_reflect_Method.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "jni_internal.h"
@@ -57,7 +58,7 @@
     mirror::Class* klass = method->GetDeclaringClass();
     int throws_index = -1;
     size_t i = 0;
-    for (const auto& m : klass->GetDeclaredVirtualMethods(sizeof(void*))) {
+    for (const auto& m : klass->GetDeclaredVirtualMethods(kRuntimePointerSize)) {
       if (&m == method) {
         throws_index = i;
         break;
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index 61a1085..155c008 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -21,6 +21,7 @@
 #include "nativebridge/native_bridge.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "dex_file-inl.h"
@@ -45,7 +46,7 @@
   mirror::Class* c = soa.Decode<mirror::Class*>(clazz);
 
   uint32_t native_method_count = 0;
-  for (auto& m : c->GetMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(kRuntimePointerSize)) {
     native_method_count += m.IsNative() ? 1u : 0u;
   }
   return native_method_count;
@@ -60,7 +61,7 @@
   mirror::Class* c = soa.Decode<mirror::Class*>(clazz);
 
   uint32_t count = 0;
-  for (auto& m : c->GetMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(kRuntimePointerSize)) {
     if (m.IsNative()) {
       if (count < method_count) {
         methods[count].name = m.GetName();
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
new file mode 100644
index 0000000..c20c8b8
--- /dev/null
+++ b/runtime/native_stack_dump.cc
@@ -0,0 +1,422 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "native_stack_dump.h"
+
+#include <ostream>
+
+#include <stdio.h>
+
+#include "art_method.h"
+
+// For DumpNativeStack.
+#include <backtrace/Backtrace.h>
+#include <backtrace/BacktraceMap.h>
+
+#if defined(__linux__)
+
+#include <memory>
+#include <vector>
+
+#include <linux/unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include "arch/instruction_set.h"
+#include "base/memory_tool.h"
+#include "base/mutex.h"
+#include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
+#include "oat_quick_method_header.h"
+#include "os.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+#endif
+
+namespace art {
+
+#if defined(__linux__)
+
+static constexpr bool kUseAddr2line = !kIsTargetBuild;
+
+ALWAYS_INLINE
+static inline void WritePrefix(std::ostream& os, const char* prefix, bool odd) {
+  if (prefix != nullptr) {
+    os << prefix;
+  }
+  os << "  ";
+  if (!odd) {
+    os << " ";
+  }
+}
+
+// The state of an open pipe to addr2line. In "server" mode, addr2line takes input on stdin
+// and prints the result to stdout. This struct keeps the state of the open connection.
+struct Addr2linePipe {
+  Addr2linePipe(int in_fd, int out_fd, const std::string& file_name, pid_t pid)
+      : in(in_fd, false), out(out_fd, false), file(file_name), child_pid(pid), odd(true) {}
+
+  ~Addr2linePipe() {
+    kill(child_pid, SIGKILL);
+  }
+
+  File in;      // The file descriptor that is connected to the output of addr2line.
+  File out;     // The file descriptor that is connected to the input of addr2line.
+
+  const std::string file;     // The file addr2line is working on, so that we know when to close
+                              // and restart.
+  const pid_t child_pid;      // The pid of the child, which we should kill when we're done.
+  bool odd;                   // Print state for indentation of lines.
+};
+
+static std::unique_ptr<Addr2linePipe> Connect(const std::string& name, const char* args[]) {
+  int caller_to_addr2line[2];
+  int addr2line_to_caller[2];
+
+  if (pipe(caller_to_addr2line) == -1) {
+    return nullptr;
+  }
+  if (pipe(addr2line_to_caller) == -1) {
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    return nullptr;
+  }
+
+  pid_t pid = fork();
+  if (pid == -1) {
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    close(addr2line_to_caller[1]);
+    close(addr2line_to_caller[1]);
+    return nullptr;
+  }
+
+  if (pid == 0) {
+    dup2(caller_to_addr2line[0], STDIN_FILENO);
+    dup2(addr2line_to_caller[1], STDOUT_FILENO);
+
+    close(caller_to_addr2line[0]);
+    close(caller_to_addr2line[1]);
+    close(addr2line_to_caller[0]);
+    close(addr2line_to_caller[1]);
+
+    execv(args[0], const_cast<char* const*>(args));
+    exit(1);
+  } else {
+    close(caller_to_addr2line[0]);
+    close(addr2line_to_caller[1]);
+    return std::unique_ptr<Addr2linePipe>(new Addr2linePipe(addr2line_to_caller[0],
+                                                            caller_to_addr2line[1],
+                                                            name,
+                                                            pid));
+  }
+}
+
+static void Drain(size_t expected,
+                  const char* prefix,
+                  std::unique_ptr<Addr2linePipe>* pipe /* inout */,
+                  std::ostream& os) {
+  DCHECK(pipe != nullptr);
+  DCHECK(pipe->get() != nullptr);
+  int in = pipe->get()->in.Fd();
+  DCHECK_GE(in, 0);
+
+  bool prefix_written = false;
+
+  for (;;) {
+    constexpr uint32_t kWaitTimeExpectedMicros = 500 * 1000;
+    constexpr uint32_t kWaitTimeUnexpectedMicros = 50 * 1000;
+
+    struct timeval tv;
+    tv.tv_sec = 0;
+    tv.tv_usec = expected > 0 ? kWaitTimeExpectedMicros : kWaitTimeUnexpectedMicros;
+
+    fd_set rfds;
+    FD_ZERO(&rfds);
+    FD_SET(in, &rfds);
+
+    int retval = TEMP_FAILURE_RETRY(select(in + 1, &rfds, nullptr, nullptr, &tv));
+
+    if (retval < 0) {
+      // Other side may have crashed or other errors.
+      pipe->reset();
+      return;
+    }
+
+    if (retval == 0) {
+      // Timeout.
+      return;
+    }
+
+    DCHECK_EQ(retval, 1);
+
+    constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
+    // alt stack, but just to be sure...
+    char buffer[kMaxBuffer];
+    memset(buffer, 0, kMaxBuffer);
+    int bytes_read = TEMP_FAILURE_RETRY(read(in, buffer, kMaxBuffer - 1));
+
+    if (bytes_read < 0) {
+      // This should not really happen...
+      pipe->reset();
+      return;
+    }
+
+    char* tmp = buffer;
+    while (*tmp != 0) {
+      if (!prefix_written) {
+        WritePrefix(os, prefix, (*pipe)->odd);
+        prefix_written = true;
+      }
+      char* new_line = strchr(tmp, '\n');
+      if (new_line == nullptr) {
+        os << tmp;
+
+        break;
+      } else {
+        char saved = *(new_line + 1);
+        *(new_line + 1) = 0;
+        os << tmp;
+        *(new_line + 1) = saved;
+
+        tmp = new_line + 1;
+        prefix_written = false;
+        (*pipe)->odd = !(*pipe)->odd;
+
+        if (expected > 0) {
+          expected--;
+        }
+      }
+    }
+  }
+}
+
+static void Addr2line(const std::string& map_src,
+                      uintptr_t offset,
+                      std::ostream& os,
+                      const char* prefix,
+                      std::unique_ptr<Addr2linePipe>* pipe /* inout */) {
+  DCHECK(pipe != nullptr);
+
+  if (map_src == "[vdso]") {
+    // Special-case this, our setup has problems with this.
+    return;
+  }
+
+  if (*pipe == nullptr || (*pipe)->file != map_src) {
+    if (*pipe != nullptr) {
+      Drain(0, prefix, pipe, os);
+    }
+    pipe->reset();  // Close early.
+
+    const char* args[7] = {
+        "/usr/bin/addr2line",
+        "--functions",
+        "--inlines",
+        "--demangle",
+        "-e",
+        map_src.c_str(),
+        nullptr
+    };
+    *pipe = Connect(map_src, args);
+  }
+
+  Addr2linePipe* pipe_ptr = pipe->get();
+  if (pipe_ptr == nullptr) {
+    // Failed...
+    return;
+  }
+
+  // Send the offset.
+  const std::string hex_offset = StringPrintf("%zx\n", offset);
+
+  if (!pipe_ptr->out.WriteFully(hex_offset.data(), hex_offset.length())) {
+    // Error. :-(
+    pipe->reset();
+    return;
+  }
+
+  // Now drain (expecting two lines).
+  Drain(2U, prefix, pipe, os);
+}
+
+static bool RunCommand(std::string cmd) {
+  FILE* stream = popen(cmd.c_str(), "r");
+  if (stream) {
+    pclose(stream);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
+  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
+      method->GetEntryPointFromQuickCompiledCode()));
+  if (code == 0) {
+    return pc == 0;
+  }
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return code <= pc && pc <= (code + code_size);
+}
+
+void DumpNativeStack(std::ostream& os,
+                     pid_t tid,
+                     BacktraceMap* existing_map,
+                     const char* prefix,
+                     ArtMethod* current_method,
+                     void* ucontext_ptr) {
+  // b/18119146
+  if (RUNNING_ON_MEMORY_TOOL != 0) {
+    return;
+  }
+
+  BacktraceMap* map = existing_map;
+  std::unique_ptr<BacktraceMap> tmp_map;
+  if (map == nullptr) {
+    tmp_map.reset(BacktraceMap::Create(getpid()));
+    map = tmp_map.get();
+  }
+  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
+  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
+    os << prefix << "(backtrace::Unwind failed for thread " << tid
+       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
+    return;
+  } else if (backtrace->NumFrames() == 0) {
+    os << prefix << "(no native stack frames for thread " << tid << ")\n";
+    return;
+  }
+
+  // Check whether we have and should use addr2line.
+  bool use_addr2line;
+  if (kUseAddr2line) {
+    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
+    // and print to stderr.
+    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h");
+  } else {
+    use_addr2line = false;
+  }
+
+  std::unique_ptr<Addr2linePipe> addr2line_state;
+
+  for (Backtrace::const_iterator it = backtrace->begin();
+       it != backtrace->end(); ++it) {
+    // We produce output like this:
+    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
+    // In order for parsing tools to continue to function, the stack dump
+    // format must at least adhere to this format:
+    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
+    // The parsers require a single space before and after pc, and two spaces
+    // after the <RELATIVE_ADDR>. There can be any prefix data before the
+    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
+    os << prefix << StringPrintf("#%02zu pc ", it->num);
+    bool try_addr2line = false;
+    if (!BacktraceMap::IsValid(it->map)) {
+      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
+                                                            : "%08" PRIxPTR "  ???",
+                         it->pc);
+    } else {
+      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
+                                                            : "%08" PRIxPTR "  ",
+                         BacktraceMap::GetRelativePc(it->map, it->pc));
+      os << it->map.name;
+      os << " (";
+      if (!it->func_name.empty()) {
+        os << it->func_name;
+        if (it->func_offset != 0) {
+          os << "+" << it->func_offset;
+        }
+        try_addr2line = true;
+      } else if (current_method != nullptr &&
+          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
+          PcIsWithinQuickCode(current_method, it->pc)) {
+        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
+        os << JniLongName(current_method) << "+"
+           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
+      } else {
+        os << "???";
+      }
+      os << ")";
+    }
+    os << "\n";
+    if (try_addr2line && use_addr2line) {
+      Addr2line(it->map.name, it->pc - it->map.start, os, prefix, &addr2line_state);
+    }
+  }
+
+  if (addr2line_state != nullptr) {
+    Drain(0, prefix, &addr2line_state, os);
+  }
+}
+
+void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
+  if (tid == GetTid()) {
+    // There's no point showing that we're reading our stack out of /proc!
+    return;
+  }
+
+  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
+  std::string kernel_stack;
+  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
+    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
+    return;
+  }
+
+  std::vector<std::string> kernel_stack_frames;
+  Split(kernel_stack, '\n', &kernel_stack_frames);
+  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
+  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
+  kernel_stack_frames.pop_back();
+  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
+    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
+    // into "futex_wait_queue_me+0xcd/0x110".
+    const char* text = kernel_stack_frames[i].c_str();
+    const char* close_bracket = strchr(text, ']');
+    if (close_bracket != nullptr) {
+      text = close_bracket + 2;
+    }
+    os << prefix;
+    if (include_count) {
+      os << StringPrintf("#%02zd ", i);
+    }
+    os << text << "\n";
+  }
+}
+
+#elif defined(__APPLE__)
+
+void DumpNativeStack(std::ostream& os ATTRIBUTE_UNUSED,
+                     pid_t tid ATTRIBUTE_UNUSED,
+                     BacktraceMap* existing_map ATTRIBUTE_UNUSED,
+                     const char* prefix ATTRIBUTE_UNUSED,
+                     ArtMethod* current_method ATTRIBUTE_UNUSED,
+                     void* ucontext_ptr ATTRIBUTE_UNUSED) {
+}
+
+void DumpKernelStack(std::ostream& os ATTRIBUTE_UNUSED,
+                     pid_t tid ATTRIBUTE_UNUSED,
+                     const char* prefix ATTRIBUTE_UNUSED,
+                     bool include_count ATTRIBUTE_UNUSED) {
+}
+
+#else
+#error "Unsupported architecture for native stack dumps."
+#endif
+
+}  // namespace art
diff --git a/runtime/native_stack_dump.h b/runtime/native_stack_dump.h
new file mode 100644
index 0000000..d64bc82
--- /dev/null
+++ b/runtime/native_stack_dump.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_STACK_DUMP_H_
+#define ART_RUNTIME_NATIVE_STACK_DUMP_H_
+
+#include <unistd.h>
+
+#include <iosfwd>
+
+#include "base/macros.h"
+
+class BacktraceMap;
+
+namespace art {
+
+class ArtMethod;
+
+// Dumps the native stack for thread 'tid' to 'os'.
+void DumpNativeStack(std::ostream& os,
+                     pid_t tid,
+                     BacktraceMap* map = nullptr,
+                     const char* prefix = "",
+                     ArtMethod* current_method = nullptr,
+                     void* ucontext = nullptr)
+    NO_THREAD_SAFETY_ANALYSIS;
+
+// Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
+void DumpKernelStack(std::ostream& os,
+                     pid_t tid,
+                     const char* prefix = "",
+                     bool include_count = true);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_STACK_DUMP_H_
diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h
index 2295cb4..e9b0d3c 100644
--- a/runtime/nth_caller_visitor.h
+++ b/runtime/nth_caller_visitor.h
@@ -46,6 +46,7 @@
       DCHECK(caller == nullptr);
       if (count == n) {
         caller = m;
+        caller_pc = GetCurrentQuickFramePc();
         return false;
       }
       count++;
@@ -57,6 +58,7 @@
   const bool include_runtime_and_upcalls_;
   size_t count;
   ArtMethod* caller;
+  uintptr_t caller_pc;
 };
 
 }  // namespace art
diff --git a/runtime/oat.h b/runtime/oat.h
index 57675dc..7c84fe9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '9', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '8', '6', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 72c0cea..5752fd9 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -28,12 +28,13 @@
 #include <sstream>
 
 // dlopen_ext support from bionic.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "android/dlext.h"
 #endif
 
 #include "art_method-inl.h"
 #include "base/bit_vector.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
@@ -260,7 +261,7 @@
     return false;
   }
 
-  size_t pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet());
+  PointerSize pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet());
   uint8_t* dex_cache_arrays = bss_begin_;
   uint32_t dex_file_count = GetOatHeader().GetDexFileCount();
   oat_dex_files_storage_.reserve(dex_file_count);
@@ -646,7 +647,7 @@
       *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
       return false;
     }
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     android_dlextinfo extinfo;
     extinfo.flags = ANDROID_DLEXT_FORCE_LOAD |                  // Force-load, don't reuse handle
                                                                 //   (open oat files multiple
@@ -671,7 +672,7 @@
         return false;
       }
     }
-#endif
+#endif  // ART_TARGET_ANDROID
   }
   if (dlopen_handle_ == nullptr) {
     *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
@@ -1072,7 +1073,7 @@
 
 const OatFile::OatDexFile* OatFile::GetOatDexFile(const char* dex_location,
                                                   const uint32_t* dex_location_checksum,
-                                                  bool warn_if_not_found) const {
+                                                  std::string* error_msg) const {
   // NOTE: We assume here that the canonical location for a given dex_location never
   // changes. If it does (i.e. some symlink used by the filename changes) we may return
   // an incorrect OatDexFile. As long as we have a checksum to check, we shall return
@@ -1114,32 +1115,29 @@
       secondary_oat_dex_files_.PutBefore(secondary_lb, key_copy, oat_dex_file);
     }
   }
-  if (oat_dex_file != nullptr &&
-      (dex_location_checksum == nullptr ||
-       oat_dex_file->GetDexFileLocationChecksum() == *dex_location_checksum)) {
-    return oat_dex_file;
+
+  if (oat_dex_file == nullptr) {
+    if (error_msg != nullptr) {
+      std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
+      *error_msg = "Failed to find OatDexFile for DexFile " + std::string(dex_location)
+          + " (canonical path " + dex_canonical_location + ") in OatFile " + GetLocation();
+    }
+    return nullptr;
   }
 
-  if (warn_if_not_found) {
-    std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
-    std::string checksum("<unspecified>");
-    if (dex_location_checksum != nullptr) {
-      checksum = StringPrintf("0x%08x", *dex_location_checksum);
+  if (dex_location_checksum != nullptr &&
+      oat_dex_file->GetDexFileLocationChecksum() != *dex_location_checksum) {
+    if (error_msg != nullptr) {
+      std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location);
+      std::string checksum = StringPrintf("0x%08x", oat_dex_file->GetDexFileLocationChecksum());
+      std::string required_checksum = StringPrintf("0x%08x", *dex_location_checksum);
+      *error_msg = "OatDexFile for DexFile " + std::string(dex_location)
+          + " (canonical path " + dex_canonical_location + ") in OatFile " + GetLocation()
+          + " has checksum " + checksum + " but " + required_checksum + " was required";
     }
-    LOG(WARNING) << "Failed to find OatDexFile for DexFile " << dex_location
-                 << " ( canonical path " << dex_canonical_location << ")"
-                 << " with checksum " << checksum << " in OatFile " << GetLocation();
-    if (kIsDebugBuild) {
-      for (const OatDexFile* odf : oat_dex_files_storage_) {
-        LOG(WARNING) << "OatFile " << GetLocation()
-                     << " contains OatDexFile " << odf->GetDexFileLocation()
-                     << " (canonical path " << odf->GetCanonicalDexFileLocation() << ")"
-                     << " with checksum 0x" << std::hex << odf->GetDexFileLocationChecksum();
-      }
-    }
+    return nullptr;
   }
-
-  return nullptr;
+  return oat_dex_file;
 }
 
 OatFile::OatDexFile::OatDexFile(const OatFile* oat_file,
@@ -1167,12 +1165,15 @@
 
 std::unique_ptr<const DexFile> OatFile::OatDexFile::OpenDexFile(std::string* error_msg) const {
   ScopedTrace trace(__PRETTY_FUNCTION__);
+  static constexpr bool kVerify = false;
+  static constexpr bool kVerifyChecksum = false;
   return DexFile::Open(dex_file_pointer_,
                        FileSize(),
                        dex_file_location_,
                        dex_file_location_checksum_,
                        this,
-                       false /* verify */,
+                       kVerify,
+                       kVerifyChecksum,
                        error_msg);
 }
 
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index aa727ff..f5ab9dc 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -213,9 +213,15 @@
 
     friend class art::OatDexFile;
   };
+
+  // Get the OatDexFile for the given dex_location within this oat file.
+  // If dex_location_checksum is non-null, the OatDexFile will only be
+  // returned if it has a matching checksum.
+  // If error_msg is non-null and no OatDexFile is returned, error_msg will
+  // be updated with a description of why no OatDexFile was returned.
   const OatDexFile* GetOatDexFile(const char* dex_location,
                                   const uint32_t* const dex_location_checksum,
-                                  bool exception_if_not_found = true) const
+                                  /*out*/std::string* error_msg = nullptr) const
       REQUIRES(!secondary_lookup_lock_);
 
   const std::vector<const OatDexFile*>& GetOatDexFiles() const {
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index c998e20..fe6332d 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -16,18 +16,7 @@
 
 #include "oat_file_assistant.h"
 
-#include <fcntl.h>
-#ifdef __linux__
-#include <sys/sendfile.h>
-#else
-#include <sys/socket.h>
-#endif
-#include <sys/types.h>
 #include <sys/stat.h>
-#include <unistd.h>
-
-#include <set>
-
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "compiler_filter.h"
@@ -39,7 +28,6 @@
 #include "os.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
-#include "ScopedFd.h"
 #include "utils.h"
 
 namespace art {
@@ -64,17 +52,15 @@
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const InstructionSet isa,
-                                   bool profile_changed,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, nullptr, isa, profile_changed, load_executable)
+    : OatFileAssistant(dex_location, nullptr, isa, load_executable)
 { }
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const char* oat_location,
                                    const InstructionSet isa,
-                                   bool profile_changed,
                                    bool load_executable)
-    : isa_(isa), profile_changed_(profile_changed), load_executable_(load_executable) {
+    : isa_(isa), load_executable_(load_executable), odex_(this), oat_(this) {
   CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
   dex_location_.assign(dex_location);
 
@@ -84,12 +70,26 @@
     load_executable_ = false;
   }
 
-  // If the user gave a target oat location, save that as the cached oat
-  // location now so we won't try to construct the default location later.
+  // Get the odex filename.
+  std::string error_msg;
+  std::string odex_file_name;
+  if (DexLocationToOdexFilename(dex_location_, isa_, &odex_file_name, &error_msg)) {
+    odex_.Reset(odex_file_name);
+  } else {
+    LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
+  }
+
+  // Get the oat filename.
   if (oat_location != nullptr) {
-    cached_oat_file_name_ = std::string(oat_location);
-    cached_oat_file_name_attempted_ = true;
-    cached_oat_file_name_found_ = true;
+    oat_.Reset(oat_location);
+  } else {
+    std::string oat_file_name;
+    if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) {
+      oat_.Reset(oat_file_name);
+    } else {
+      LOG(WARNING) << "Failed to determine oat file name for dex location "
+        << dex_location_ << ": " << error_msg;
+    }
   }
 }
 
@@ -121,11 +121,12 @@
   CHECK(error_msg != nullptr);
   CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired";
 
-  if (OatFileName() == nullptr) {
+  const std::string* oat_file_name = oat_.Filename();
+  if (oat_file_name == nullptr) {
     *error_msg = "Failed to determine lock file";
     return false;
   }
-  std::string lock_file_name = *OatFileName() + ".flock";
+  std::string lock_file_name = *oat_file_name + ".flock";
 
   if (!flock_.Init(lock_file_name.c_str(), error_msg)) {
     unlink(lock_file_name.c_str());
@@ -134,50 +135,33 @@
   return true;
 }
 
-bool OatFileAssistant::OatFileCompilerFilterIsOkay(CompilerFilter::Filter target) {
-  const OatFile* oat_file = GetOatFile();
-  if (oat_file != nullptr) {
-    CompilerFilter::Filter current = oat_file->GetCompilerFilter();
-    return CompilerFilter::IsAsGoodAs(current, target);
-  }
-  return false;
-}
-
-bool OatFileAssistant::OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target) {
-  const OatFile* odex_file = GetOdexFile();
-  if (odex_file != nullptr) {
-    CompilerFilter::Filter current = odex_file->GetCompilerFilter();
-    return CompilerFilter::IsAsGoodAs(current, target);
-  }
-  return false;
-}
-
-OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target) {
+OatFileAssistant::DexOptNeeded
+OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target, bool profile_changed) {
   bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
 
   // See if the oat file is in good shape as is.
-  bool oat_okay = OatFileCompilerFilterIsOkay(target);
+  bool oat_okay = oat_.CompilerFilterIsOkay(target, profile_changed);
   if (oat_okay) {
     if (compilation_desired) {
-      if (OatFileIsUpToDate()) {
+      if (oat_.IsUpToDate()) {
         return kNoDexOptNeeded;
       }
     } else {
-      if (!OatFileIsOutOfDate()) {
+      if (!oat_.IsOutOfDate()) {
         return kNoDexOptNeeded;
       }
     }
   }
 
   // See if the odex file is in good shape as is.
-  bool odex_okay = OdexFileCompilerFilterIsOkay(target);
+  bool odex_okay = odex_.CompilerFilterIsOkay(target, profile_changed);
   if (odex_okay) {
     if (compilation_desired) {
-      if (OdexFileIsUpToDate()) {
+      if (odex_.IsUpToDate()) {
         return kNoDexOptNeeded;
       }
     } else {
-      if (!OdexFileIsOutOfDate()) {
+      if (!odex_.IsOutOfDate()) {
         return kNoDexOptNeeded;
       }
     }
@@ -185,11 +169,11 @@
 
   // See if we can get an up-to-date file by running patchoat.
   if (compilation_desired) {
-    if (odex_okay && OdexFileNeedsRelocation() && OdexFileHasPatchInfo()) {
+    if (odex_okay && odex_.NeedsRelocation() && odex_.HasPatchInfo()) {
       return kPatchOatNeeded;
     }
 
-    if (oat_okay && OatFileNeedsRelocation() && OatFileHasPatchInfo()) {
+    if (oat_okay && oat_.NeedsRelocation() && oat_.HasPatchInfo()) {
       return kSelfPatchOatNeeded;
     }
   }
@@ -198,17 +182,44 @@
   return HasOriginalDexFiles() ? kDex2OatNeeded : kNoDexOptNeeded;
 }
 
+// Figure out the currently specified compile filter option in the runtime.
+// Returns true on success, false if the compiler filter is invalid, in which
+// case error_msg describes the problem.
+static bool GetRuntimeCompilerFilterOption(CompilerFilter::Filter* filter,
+                                           std::string* error_msg) {
+  CHECK(filter != nullptr);
+  CHECK(error_msg != nullptr);
+
+  *filter = CompilerFilter::kDefaultCompilerFilter;
+  for (StringPiece option : Runtime::Current()->GetCompilerOptions()) {
+    if (option.starts_with("--compiler-filter=")) {
+      const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
+      if (!CompilerFilter::ParseCompilerFilter(compiler_filter_string, filter)) {
+        *error_msg = std::string("Unknown --compiler-filter value: ")
+                   + std::string(compiler_filter_string);
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 bool OatFileAssistant::IsUpToDate() {
   return OatFileIsUpToDate() || OdexFileIsUpToDate();
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
-OatFileAssistant::MakeUpToDate(CompilerFilter::Filter target, std::string* error_msg) {
-  switch (GetDexOptNeeded(target)) {
+OatFileAssistant::MakeUpToDate(bool profile_changed, std::string* error_msg) {
+  CompilerFilter::Filter target;
+  if (!GetRuntimeCompilerFilterOption(&target, error_msg)) {
+    return kUpdateNotAttempted;
+  }
+
+  switch (GetDexOptNeeded(target, profile_changed)) {
     case kNoDexOptNeeded: return kUpdateSucceeded;
-    case kDex2OatNeeded: return GenerateOatFile(target, error_msg);
-    case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg);
-    case kSelfPatchOatNeeded: return RelocateOatFile(OatFileName(), error_msg);
+    case kDex2OatNeeded: return GenerateOatFile(error_msg);
+    case kPatchOatNeeded: return RelocateOatFile(odex_.Filename(), error_msg);
+    case kSelfPatchOatNeeded: return RelocateOatFile(oat_.Filename(), error_msg);
   }
   UNREACHABLE();
 }
@@ -219,46 +230,40 @@
   // 2. Not out-of-date files that are already opened non-executable.
   // 3. Not out-of-date files that we must reopen non-executable.
 
-  if (OatFileIsUpToDate()) {
-    oat_file_released_ = true;
-    return std::move(cached_oat_file_);
+  if (oat_.IsUpToDate()) {
+    return oat_.ReleaseFile();
   }
 
-  if (OdexFileIsUpToDate()) {
-    oat_file_released_ = true;
-    return std::move(cached_odex_file_);
+  if (odex_.IsUpToDate()) {
+    return odex_.ReleaseFile();
   }
 
   VLOG(oat) << "Oat File Assistant: No relocated oat file found,"
     << " attempting to fall back to interpreting oat file instead.";
 
-  if (!OatFileIsOutOfDate() && !OatFileIsExecutable()) {
-    oat_file_released_ = true;
-    return std::move(cached_oat_file_);
+  if (!oat_.IsOutOfDate() && !oat_.IsExecutable()) {
+    return oat_.ReleaseFile();
   }
 
-  if (!OdexFileIsOutOfDate() && !OdexFileIsExecutable()) {
-    oat_file_released_ = true;
-    return std::move(cached_odex_file_);
+  if (!odex_.IsOutOfDate() && !odex_.IsExecutable()) {
+    return odex_.ReleaseFile();
   }
 
-  if (!OatFileIsOutOfDate()) {
+  if (!oat_.IsOutOfDate()) {
     load_executable_ = false;
-    ClearOatFileCache();
-    if (!OatFileIsOutOfDate()) {
-      CHECK(!OatFileIsExecutable());
-      oat_file_released_ = true;
-      return std::move(cached_oat_file_);
+    oat_.Reset();
+    if (!oat_.IsOutOfDate()) {
+      CHECK(!oat_.IsExecutable());
+      return oat_.ReleaseFile();
     }
   }
 
-  if (!OdexFileIsOutOfDate()) {
+  if (!odex_.IsOutOfDate()) {
     load_executable_ = false;
-    ClearOdexFileCache();
-    if (!OdexFileIsOutOfDate()) {
-      CHECK(!OdexFileIsExecutable());
-      oat_file_released_ = true;
-      return std::move(cached_odex_file_);
+    odex_.Reset();
+    if (!odex_.IsOutOfDate()) {
+      CHECK(!odex_.IsExecutable());
+      return odex_.ReleaseFile();
     }
   }
 
@@ -272,10 +277,9 @@
   // Load the primary dex file.
   std::string error_msg;
   const OatFile::OatDexFile* oat_dex_file = oat_file.GetOatDexFile(
-      dex_location, nullptr, false);
+      dex_location, nullptr, &error_msg);
   if (oat_dex_file == nullptr) {
-    LOG(WARNING) << "Attempt to load out-of-date oat file "
-      << oat_file.GetLocation() << " for dex location " << dex_location;
+    LOG(WARNING) << error_msg;
     return std::vector<std::unique_ptr<const DexFile>>();
   }
 
@@ -289,7 +293,7 @@
   // Load secondary multidex files
   for (size_t i = 1; ; i++) {
     std::string secondary_dex_location = DexFile::GetMultiDexLocation(i, dex_location);
-    oat_dex_file = oat_file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
+    oat_dex_file = oat_file.GetOatDexFile(secondary_dex_location.c_str(), nullptr);
     if (oat_dex_file == nullptr) {
       // There are no more secondary dex files to load.
       break;
@@ -314,72 +318,34 @@
 }
 
 const std::string* OatFileAssistant::OdexFileName() {
-  if (!cached_odex_file_name_attempted_) {
-    cached_odex_file_name_attempted_ = true;
-
-    std::string error_msg;
-    cached_odex_file_name_found_ = DexFilenameToOdexFilename(
-        dex_location_, isa_, &cached_odex_file_name_, &error_msg);
-    if (!cached_odex_file_name_found_) {
-      // If we can't figure out the odex file, we treat it as if the odex
-      // file was inaccessible.
-      LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
-    }
-  }
-  return cached_odex_file_name_found_ ? &cached_odex_file_name_ : nullptr;
+  return odex_.Filename();
 }
 
 bool OatFileAssistant::OdexFileExists() {
-  return GetOdexFile() != nullptr;
+  return odex_.Exists();
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() {
-  if (OdexFileIsOutOfDate()) {
-    return kOatOutOfDate;
-  }
-  if (OdexFileIsUpToDate()) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
+  return odex_.Status();
 }
 
 bool OatFileAssistant::OdexFileIsOutOfDate() {
-  if (!odex_file_is_out_of_date_attempted_) {
-    odex_file_is_out_of_date_attempted_ = true;
-    const OatFile* odex_file = GetOdexFile();
-    if (odex_file == nullptr) {
-      cached_odex_file_is_out_of_date_ = true;
-    } else {
-      cached_odex_file_is_out_of_date_ = GivenOatFileIsOutOfDate(*odex_file);
-    }
-  }
-  return cached_odex_file_is_out_of_date_;
+  return odex_.IsOutOfDate();
 }
 
 bool OatFileAssistant::OdexFileNeedsRelocation() {
-  return OdexFileStatus() == kOatNeedsRelocation;
+  return odex_.NeedsRelocation();
 }
 
 bool OatFileAssistant::OdexFileIsUpToDate() {
-  if (!odex_file_is_up_to_date_attempted_) {
-    odex_file_is_up_to_date_attempted_ = true;
-    const OatFile* odex_file = GetOdexFile();
-    if (odex_file == nullptr) {
-      cached_odex_file_is_up_to_date_ = false;
-    } else {
-      cached_odex_file_is_up_to_date_ = GivenOatFileIsUpToDate(*odex_file);
-    }
-  }
-  return cached_odex_file_is_up_to_date_;
+  return odex_.IsUpToDate();
 }
 
 CompilerFilter::Filter OatFileAssistant::OdexFileCompilerFilter() {
-  const OatFile* odex_file = GetOdexFile();
-  CHECK(odex_file != nullptr);
-
-  return odex_file->GetCompilerFilter();
+  return odex_.CompilerFilter();
 }
-std::string OatFileAssistant::ArtFileName(const OatFile* oat_file) const {
+
+static std::string ArtFileName(const OatFile* oat_file) {
   const std::string oat_file_location = oat_file->GetLocation();
   // Replace extension with .art
   const size_t last_ext = oat_file_location.find_last_of('.');
@@ -391,115 +357,56 @@
 }
 
 const std::string* OatFileAssistant::OatFileName() {
-  if (!cached_oat_file_name_attempted_) {
-    cached_oat_file_name_attempted_ = true;
-
-    // Compute the oat file name from the dex location.
-    // TODO: The oat file assistant should be the definitive place for
-    // determining the oat file name from the dex location, not
-    // GetDalvikCacheFilename.
-    std::string cache_dir = StringPrintf("%s%s",
-        DalvikCacheDirectory().c_str(), GetInstructionSetString(isa_));
-    std::string error_msg;
-    cached_oat_file_name_found_ = GetDalvikCacheFilename(dex_location_.c_str(),
-        cache_dir.c_str(), &cached_oat_file_name_, &error_msg);
-    if (!cached_oat_file_name_found_) {
-      // If we can't determine the oat file name, we treat the oat file as
-      // inaccessible.
-      LOG(WARNING) << "Failed to determine oat file name for dex location "
-        << dex_location_ << ": " << error_msg;
-    }
-  }
-  return cached_oat_file_name_found_ ? &cached_oat_file_name_ : nullptr;
+  return oat_.Filename();
 }
 
 bool OatFileAssistant::OatFileExists() {
-  return GetOatFile() != nullptr;
+  return oat_.Exists();
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() {
-  if (OatFileIsOutOfDate()) {
-    return kOatOutOfDate;
-  }
-  if (OatFileIsUpToDate()) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
+  return oat_.Status();
 }
 
 bool OatFileAssistant::OatFileIsOutOfDate() {
-  if (!oat_file_is_out_of_date_attempted_) {
-    oat_file_is_out_of_date_attempted_ = true;
-    const OatFile* oat_file = GetOatFile();
-    if (oat_file == nullptr) {
-      cached_oat_file_is_out_of_date_ = true;
-    } else {
-      cached_oat_file_is_out_of_date_ = GivenOatFileIsOutOfDate(*oat_file);
-    }
-  }
-  return cached_oat_file_is_out_of_date_;
+  return oat_.IsOutOfDate();
 }
 
 bool OatFileAssistant::OatFileNeedsRelocation() {
-  return OatFileStatus() == kOatNeedsRelocation;
+  return oat_.NeedsRelocation();
 }
 
 bool OatFileAssistant::OatFileIsUpToDate() {
-  if (!oat_file_is_up_to_date_attempted_) {
-    oat_file_is_up_to_date_attempted_ = true;
-    const OatFile* oat_file = GetOatFile();
-    if (oat_file == nullptr) {
-      cached_oat_file_is_up_to_date_ = false;
-    } else {
-      cached_oat_file_is_up_to_date_ = GivenOatFileIsUpToDate(*oat_file);
-    }
-  }
-  return cached_oat_file_is_up_to_date_;
+  return oat_.IsUpToDate();
 }
 
 CompilerFilter::Filter OatFileAssistant::OatFileCompilerFilter() {
-  const OatFile* oat_file = GetOatFile();
-  CHECK(oat_file != nullptr);
-
-  return oat_file->GetCompilerFilter();
+  return oat_.CompilerFilter();
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
-  // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
-  // is more work than we need to do. If performance becomes a concern, and
-  // this method is actually called, this should be fixed.
-  if (GivenOatFileIsOutOfDate(file)) {
-    return kOatOutOfDate;
-  }
-  if (GivenOatFileIsUpToDate(file)) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
-}
-
-bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
   // Verify the dex checksum.
   // Note: GetOatDexFile will return null if the dex checksum doesn't match
   // what we provide, which verifies the primary dex checksum for us.
+  std::string error_msg;
   const uint32_t* dex_checksum_pointer = GetRequiredDexChecksum();
   const OatFile::OatDexFile* oat_dex_file = file.GetOatDexFile(
-      dex_location_.c_str(), dex_checksum_pointer, false);
+      dex_location_.c_str(), dex_checksum_pointer, &error_msg);
   if (oat_dex_file == nullptr) {
-    return true;
+    VLOG(oat) << error_msg;
+    return kOatOutOfDate;
   }
 
   // Verify the dex checksums for any secondary multidex files
   for (size_t i = 1; ; i++) {
-    std::string secondary_dex_location
-      = DexFile::GetMultiDexLocation(i, dex_location_.c_str());
+    std::string secondary_dex_location = DexFile::GetMultiDexLocation(i, dex_location_.c_str());
     const OatFile::OatDexFile* secondary_oat_dex_file
-      = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
+      = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr);
     if (secondary_oat_dex_file == nullptr) {
       // There are no more secondary dex files to check.
       break;
     }
 
-    std::string error_msg;
     uint32_t expected_secondary_checksum = 0;
     if (DexFile::GetChecksum(secondary_dex_location.c_str(),
           &expected_secondary_checksum, &error_msg)) {
@@ -510,7 +417,7 @@
           << secondary_dex_location
           << ". Expected: " << expected_secondary_checksum
           << ", Actual: " << actual_secondary_checksum;
-        return true;
+        return kOatOutOfDate;
       }
     } else {
       // If we can't get the checksum for the secondary location, we assume
@@ -521,7 +428,6 @@
   }
 
   CompilerFilter::Filter current_compiler_filter = file.GetCompilerFilter();
-  VLOG(oat) << "Compiler filter for " << file.GetLocation() << " is " << current_compiler_filter;
 
   // Verify the image checksum
   if (CompilerFilter::DependsOnImageChecksum(current_compiler_filter)) {
@@ -530,7 +436,7 @@
       VLOG(oat) << "No image for oat image checksum to match against.";
 
       if (HasOriginalDexFiles()) {
-        return true;
+        return kOatOutOfDate;
       }
 
       // If there is no original dex file to fall back to, grudgingly accept
@@ -544,45 +450,18 @@
     } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
         != GetCombinedImageChecksum()) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
-      return true;
+      return kOatOutOfDate;
     }
   } else {
     VLOG(oat) << "Image checksum test skipped for compiler filter " << current_compiler_filter;
   }
 
-  // Verify the profile hasn't changed recently.
-  // TODO: Move this check to OatFileCompilerFilterIsOkay? Nothing bad should
-  // happen if we use an oat file compiled with an out-of-date profile.
-  if (CompilerFilter::DependsOnProfile(current_compiler_filter)) {
-    if (profile_changed_) {
-      VLOG(oat) << "The profile has changed recently.";
-      return true;
-    }
-  } else {
-    VLOG(oat) << "Profile check skipped for compiler filter " << current_compiler_filter;
-  }
-
-  // Everything looks good; the dex file is not out of date.
-  return false;
-}
-
-bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) {
-  return GivenOatFileStatus(file) == kOatNeedsRelocation;
-}
-
-bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) {
-  if (GivenOatFileIsOutOfDate(file)) {
-    return false;
-  }
-
-  CompilerFilter::Filter current_compiler_filter = file.GetCompilerFilter();
-
   if (CompilerFilter::IsBytecodeCompilationEnabled(current_compiler_filter)) {
     if (!file.IsPic()) {
       const ImageInfo* image_info = GetImageInfo();
       if (image_info == nullptr) {
         VLOG(oat) << "No image to check oat relocation against.";
-        return false;
+        return kOatNeedsRelocation;
       }
 
       // Verify the oat_data_begin recorded for the image in the oat file matches
@@ -594,7 +473,7 @@
           ": Oat file image oat_data_begin (" << oat_data_begin << ")"
           << " does not match actual image oat_data_begin ("
           << image_info->oat_data_begin << ")";
-        return false;
+        return kOatNeedsRelocation;
       }
 
       // Verify the oat_patch_delta recorded for the image in the oat file matches
@@ -605,7 +484,7 @@
           ": Oat file image patch delta (" << oat_patch_delta << ")"
           << " does not match actual image patch delta ("
           << image_info->patch_delta << ")";
-        return false;
+        return kOatNeedsRelocation;
       }
     } else {
       // Oat files compiled in PIC mode do not require relocation.
@@ -614,7 +493,7 @@
   } else {
     VLOG(oat) << "Oat relocation test skipped for compiler filter " << current_compiler_filter;
   }
-  return true;
+  return kOatUpToDate;
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
@@ -628,12 +507,12 @@
   }
   const std::string& input_file_name = *input_file;
 
-  if (OatFileName() == nullptr) {
+  if (oat_.Filename() == nullptr) {
     *error_msg = "Patching of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
     return kUpdateNotAttempted;
   }
-  const std::string& oat_file_name = *OatFileName();
+  const std::string& oat_file_name = *oat_.Filename();
 
   const ImageInfo* image_info = GetImageInfo();
   Runtime* runtime = Runtime::Current();
@@ -665,12 +544,12 @@
   }
 
   // Mark that the oat file has changed and we should try to reload.
-  ClearOatFileCache();
+  oat_.Reset();
   return kUpdateSucceeded;
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
-OatFileAssistant::GenerateOatFile(CompilerFilter::Filter target, std::string* error_msg) {
+OatFileAssistant::GenerateOatFile(std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
   Runtime* runtime = Runtime::Current();
@@ -680,12 +559,12 @@
     return kUpdateNotAttempted;
   }
 
-  if (OatFileName() == nullptr) {
+  if (oat_.Filename() == nullptr) {
     *error_msg = "Generation of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
     return kUpdateNotAttempted;
   }
-  const std::string& oat_file_name = *OatFileName();
+  const std::string& oat_file_name = *oat_.Filename();
 
   // dex2oat ignores missing dex files and doesn't report an error.
   // Check explicitly here so we can detect the error properly.
@@ -714,7 +593,6 @@
   args.push_back("--dex-file=" + dex_location_);
   args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
   args.push_back("--oat-location=" + oat_file_name);
-  args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(target));
 
   if (!Dex2Oat(args, error_msg)) {
     // Manually delete the file. This ensures there is no garbage left over if
@@ -731,7 +609,7 @@
   }
 
   // Mark that the oat file has changed and we should try to reload.
-  ClearOatFileCache();
+  oat_.Reset();
   return kUpdateSucceeded;
 }
 
@@ -786,8 +664,10 @@
   return Exec(argv, error_msg);
 }
 
-bool OatFileAssistant::DexFilenameToOdexFilename(const std::string& location,
-    InstructionSet isa, std::string* odex_filename, std::string* error_msg) {
+bool OatFileAssistant::DexLocationToOdexFilename(const std::string& location,
+                                                 InstructionSet isa,
+                                                 std::string* odex_filename,
+                                                 std::string* error_msg) {
   CHECK(odex_filename != nullptr);
   CHECK(error_msg != nullptr);
 
@@ -826,22 +706,23 @@
   return true;
 }
 
-std::string OatFileAssistant::DalvikCacheDirectory() {
-  // Note: We don't cache this, because it will only be called once by
-  // OatFileName.
+bool OatFileAssistant::DexLocationToOatFilename(const std::string& location,
+                                                InstructionSet isa,
+                                                std::string* oat_filename,
+                                                std::string* error_msg) {
+  CHECK(oat_filename != nullptr);
+  CHECK(error_msg != nullptr);
 
-  // TODO: The work done in GetDalvikCache is overkill for what we need.
-  // Ideally a new API for getting the DalvikCacheDirectory the way we want
-  // (without existence testing, creation, or death) is provided with the rest
-  // of the GetDalvikCache family of functions. Until such an API is in place,
-  // we use GetDalvikCache to avoid duplicating the logic for determining the
-  // dalvik cache directory.
-  std::string result;
-  bool have_android_data;
-  bool dalvik_cache_exists;
-  bool is_global_cache;
-  GetDalvikCache("", false, &result, &have_android_data, &dalvik_cache_exists, &is_global_cache);
-  return result;
+  std::string cache_dir = GetDalvikCache(GetInstructionSetString(isa));
+  if (cache_dir.empty()) {
+    *error_msg = "Dalvik cache directory does not exist";
+    return false;
+  }
+
+  // TODO: The oat file assistant should be the definitive place for
+  // determining the oat file name from the dex location, not
+  // GetDalvikCacheFilename.
+  return GetDalvikCacheFilename(location.c_str(), cache_dir.c_str(), oat_filename, error_msg);
 }
 
 std::string OatFileAssistant::ImageLocation() {
@@ -869,10 +750,10 @@
       has_original_dex_files_ = false;
 
       // Get the checksum from the odex if we can.
-      const OatFile* odex_file = GetOdexFile();
+      const OatFile* odex_file = odex_.GetFile();
       if (odex_file != nullptr) {
-        const OatFile::OatDexFile* odex_dex_file = odex_file->GetOatDexFile(
-            dex_location_.c_str(), nullptr, false);
+        const OatFile::OatDexFile* odex_dex_file
+            = odex_file->GetOatDexFile(dex_location_.c_str(), nullptr);
         if (odex_dex_file != nullptr) {
           cached_required_dex_checksum_ = odex_dex_file->GetDexFileLocationChecksum();
           required_dex_checksum_found_ = true;
@@ -883,88 +764,6 @@
   return required_dex_checksum_found_ ? &cached_required_dex_checksum_ : nullptr;
 }
 
-const OatFile* OatFileAssistant::GetOdexFile() {
-  CHECK(!oat_file_released_) << "OdexFile called after oat file released.";
-  if (!odex_file_load_attempted_) {
-    odex_file_load_attempted_ = true;
-    if (OdexFileName() != nullptr) {
-      const std::string& odex_file_name = *OdexFileName();
-      std::string error_msg;
-      cached_odex_file_.reset(OatFile::Open(odex_file_name.c_str(),
-                                            odex_file_name.c_str(),
-                                            nullptr,
-                                            nullptr,
-                                            load_executable_,
-                                            /*low_4gb*/false,
-                                            dex_location_.c_str(),
-                                            &error_msg));
-      if (cached_odex_file_.get() == nullptr) {
-        VLOG(oat) << "OatFileAssistant test for existing pre-compiled oat file "
-          << odex_file_name << ": " << error_msg;
-      }
-    }
-  }
-  return cached_odex_file_.get();
-}
-
-bool OatFileAssistant::OdexFileIsExecutable() {
-  const OatFile* odex_file = GetOdexFile();
-  return (odex_file != nullptr && odex_file->IsExecutable());
-}
-
-bool OatFileAssistant::OdexFileHasPatchInfo() {
-  const OatFile* odex_file = GetOdexFile();
-  return (odex_file != nullptr && odex_file->HasPatchInfo());
-}
-
-void OatFileAssistant::ClearOdexFileCache() {
-  odex_file_load_attempted_ = false;
-  cached_odex_file_.reset();
-  odex_file_is_out_of_date_attempted_ = false;
-  odex_file_is_up_to_date_attempted_ = false;
-}
-
-const OatFile* OatFileAssistant::GetOatFile() {
-  CHECK(!oat_file_released_) << "OatFile called after oat file released.";
-  if (!oat_file_load_attempted_) {
-    oat_file_load_attempted_ = true;
-    if (OatFileName() != nullptr) {
-      const std::string& oat_file_name = *OatFileName();
-      std::string error_msg;
-      cached_oat_file_.reset(OatFile::Open(oat_file_name.c_str(),
-                                           oat_file_name.c_str(),
-                                           nullptr,
-                                           nullptr,
-                                           load_executable_,
-                                           /*low_4gb*/false,
-                                           dex_location_.c_str(),
-                                           &error_msg));
-      if (cached_oat_file_.get() == nullptr) {
-        VLOG(oat) << "OatFileAssistant test for existing oat file "
-          << oat_file_name << ": " << error_msg;
-      }
-    }
-  }
-  return cached_oat_file_.get();
-}
-
-bool OatFileAssistant::OatFileIsExecutable() {
-  const OatFile* oat_file = GetOatFile();
-  return (oat_file != nullptr && oat_file->IsExecutable());
-}
-
-bool OatFileAssistant::OatFileHasPatchInfo() {
-  const OatFile* oat_file = GetOatFile();
-  return (oat_file != nullptr && oat_file->HasPatchInfo());
-}
-
-void OatFileAssistant::ClearOatFileCache() {
-  oat_file_load_attempted_ = false;
-  cached_oat_file_.reset();
-  oat_file_is_out_of_date_attempted_ = false;
-  oat_file_is_up_to_date_attempted_ = false;
-}
-
 const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() {
   if (!image_info_load_attempted_) {
     image_info_load_attempted_ = true;
@@ -981,8 +780,12 @@
             image_header.GetOatDataBegin());
         cached_image_info_.patch_delta = image_header.GetPatchDelta();
       } else {
+        std::string error_msg;
         std::unique_ptr<ImageHeader> image_header(
-            gc::space::ImageSpace::ReadImageHeaderOrDie(cached_image_info_.location.c_str(), isa_));
+            gc::space::ImageSpace::ReadImageHeader(cached_image_info_.location.c_str(),
+                                                   isa_,
+                                                   &error_msg));
+        CHECK(image_header != nullptr) << error_msg;
         cached_image_info_.oat_checksum = image_header->GetOatChecksum();
         cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
             image_header->GetOatDataBegin());
@@ -1008,8 +811,10 @@
   } else {
     for (gc::space::ImageSpace* image_space : image_spaces) {
       std::string location = image_space->GetImageLocation();
+      std::string error_msg;
       std::unique_ptr<ImageHeader> image_header(
-          gc::space::ImageSpace::ReadImageHeaderOrDie(location.c_str(), isa));
+          gc::space::ImageSpace::ReadImageHeader(location.c_str(), isa, &error_msg));
+      CHECK(image_header != nullptr) << error_msg;
       checksum ^= image_header->GetOatChecksum();
     }
   }
@@ -1023,7 +828,7 @@
   return combined_image_checksum_;
 }
 
-gc::space::ImageSpace* OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
+std::unique_ptr<gc::space::ImageSpace> OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
   DCHECK(oat_file != nullptr);
   std::string art_file = ArtFileName(oat_file);
   if (art_file.empty()) {
@@ -1031,14 +836,123 @@
   }
   std::string error_msg;
   ScopedObjectAccess soa(Thread::Current());
-  gc::space::ImageSpace* ret = gc::space::ImageSpace::CreateFromAppImage(art_file.c_str(),
-                                                                         oat_file,
-                                                                         &error_msg);
+  std::unique_ptr<gc::space::ImageSpace> ret =
+      gc::space::ImageSpace::CreateFromAppImage(art_file.c_str(), oat_file, &error_msg);
   if (ret == nullptr && (VLOG_IS_ON(image) || OS::FileExists(art_file.c_str()))) {
     LOG(INFO) << "Failed to open app image " << art_file.c_str() << " " << error_msg;
   }
   return ret;
 }
 
+OatFileAssistant::OatFileInfo::OatFileInfo(OatFileAssistant* oat_file_assistant)
+  : oat_file_assistant_(oat_file_assistant)
+{}
+
+const std::string* OatFileAssistant::OatFileInfo::Filename() {
+  return filename_provided_ ? &filename_ : nullptr;
+}
+
+bool OatFileAssistant::OatFileInfo::Exists() {
+  return GetFile() != nullptr;
+}
+
+OatFileAssistant::OatStatus OatFileAssistant::OatFileInfo::Status() {
+  if (!status_attempted_) {
+    status_attempted_ = true;
+    const OatFile* file = GetFile();
+    if (file == nullptr) {
+      status_ = kOatOutOfDate;
+    } else {
+      status_ = oat_file_assistant_->GivenOatFileStatus(*file);
+      VLOG(oat) << file->GetLocation() << " is " << status_
+          << " with filter " << file->GetCompilerFilter();
+    }
+  }
+  return status_;
+}
+
+bool OatFileAssistant::OatFileInfo::IsOutOfDate() {
+  return Status() == kOatOutOfDate;
+}
+
+bool OatFileAssistant::OatFileInfo::NeedsRelocation() {
+  return Status() == kOatNeedsRelocation;
+}
+
+bool OatFileAssistant::OatFileInfo::IsUpToDate() {
+  return Status() == kOatUpToDate;
+}
+
+CompilerFilter::Filter OatFileAssistant::OatFileInfo::CompilerFilter() {
+  const OatFile* file = GetFile();
+  CHECK(file != nullptr);
+  return file->GetCompilerFilter();
+}
+
+const OatFile* OatFileAssistant::OatFileInfo::GetFile() {
+  CHECK(!file_released_) << "GetFile called after oat file released.";
+  if (!load_attempted_) {
+    load_attempted_ = true;
+    if (filename_provided_) {
+      std::string error_msg;
+      file_.reset(OatFile::Open(filename_.c_str(),
+                                filename_.c_str(),
+                                nullptr,
+                                nullptr,
+                                oat_file_assistant_->load_executable_,
+                                /*low_4gb*/false,
+                                oat_file_assistant_->dex_location_.c_str(),
+                                &error_msg));
+      if (file_.get() == nullptr) {
+        VLOG(oat) << "OatFileAssistant test for existing oat file "
+          << filename_ << ": " << error_msg;
+      }
+    }
+  }
+  return file_.get();
+}
+
+bool OatFileAssistant::OatFileInfo::CompilerFilterIsOkay(
+    CompilerFilter::Filter target, bool profile_changed) {
+  const OatFile* file = GetFile();
+  if (file == nullptr) {
+    return false;
+  }
+
+  CompilerFilter::Filter current = file->GetCompilerFilter();
+  if (profile_changed && CompilerFilter::DependsOnProfile(current)) {
+    VLOG(oat) << "Compiler filter not okay because Profile changed";
+    return false;
+  }
+  return CompilerFilter::IsAsGoodAs(current, target);
+}
+
+bool OatFileAssistant::OatFileInfo::IsExecutable() {
+  const OatFile* file = GetFile();
+  return (file != nullptr && file->IsExecutable());
+}
+
+bool OatFileAssistant::OatFileInfo::HasPatchInfo() {
+  const OatFile* file = GetFile();
+  return (file != nullptr && file->HasPatchInfo());
+}
+
+void OatFileAssistant::OatFileInfo::Reset() {
+  load_attempted_ = false;
+  file_.reset();
+  status_attempted_ = false;
+}
+
+void OatFileAssistant::OatFileInfo::Reset(const std::string& filename) {
+  filename_provided_ = true;
+  filename_ = filename;
+  Reset();
+}
+
+std::unique_ptr<OatFile> OatFileAssistant::OatFileInfo::ReleaseFile() {
+  file_released_ = true;
+  return std::move(file_);
+}
+
 }  // namespace art
 
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 63d5022..3f018dc 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -28,7 +28,6 @@
 #include "compiler_filter.h"
 #include "oat_file.h"
 #include "os.h"
-#include "profiler.h"
 
 namespace art {
 
@@ -102,14 +101,10 @@
   // device. For example, on an arm device, use arm or arm64. An oat file can
   // be loaded executable only if the ISA matches the current runtime.
   //
-  // profile_changed should be true if the profile has recently changed
-  // for this dex location.
-  //
   // load_executable should be true if the caller intends to try and load
   // executable code for this dex location.
   OatFileAssistant(const char* dex_location,
                    const InstructionSet isa,
-                   bool profile_changed,
                    bool load_executable);
 
   // Constructs an OatFileAssistant, providing an explicit target oat_location
@@ -117,7 +112,6 @@
   OatFileAssistant(const char* dex_location,
                    const char* oat_location,
                    const InstructionSet isa,
-                   bool profile_changed,
                    bool load_executable);
 
   ~OatFileAssistant();
@@ -146,8 +140,10 @@
 
   // Return what action needs to be taken to produce up-to-date code for this
   // dex location that is at least as good as an oat file generated with the
-  // given compiler filter.
-  DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter);
+  // given compiler filter. profile_changed should be true to indicate the
+  // profile has recently changed for this dex location.
+  DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter,
+                               bool profile_changed = false);
 
   // Returns true if there is up-to-date code for this dex location,
   // irrespective of the compiler filter of the up-to-date code.
@@ -164,15 +160,16 @@
   };
 
   // Attempts to generate or relocate the oat file as needed to make it up to
-  // date with in a way that is at least as good as an oat file generated with
-  // the given compiler filter.
+  // date based on the current runtime and compiler options.
+  // profile_changed should be true to indicate the profile has recently
+  // changed for this dex location.
+  //
   // Returns the result of attempting to update the code.
   //
   // If the result is not kUpdateSucceeded, the value of error_msg will be set
   // to a string describing why there was a failure or the update was not
   // attempted. error_msg must not be null.
-  ResultOfAttemptToUpdate MakeUpToDate(CompilerFilter::Filter target_compiler_filter,
-                                       std::string* error_msg);
+  ResultOfAttemptToUpdate MakeUpToDate(bool profile_changed, std::string* error_msg);
 
   // Returns an oat file that can be used for loading dex files.
   // Returns null if no suitable oat file was found.
@@ -183,7 +180,7 @@
   std::unique_ptr<OatFile> GetBestOatFile();
 
   // Open and returns an image space associated with the oat file.
-  gc::space::ImageSpace* OpenImageSpace(const OatFile* oat_file);
+  static std::unique_ptr<gc::space::ImageSpace> OpenImageSpace(const OatFile* oat_file);
 
   // Loads the dex files in the given oat file for the given dex location.
   // The oat file should be up to date for the given dex location.
@@ -242,15 +239,9 @@
   // |OatFileExists() == true|.
   CompilerFilter::Filter OatFileCompilerFilter();
 
-  // Return image file name. Does not cache since it relies on the oat file.
-  std::string ArtFileName(const OatFile* oat_file) const;
-
-  // These methods return the status for a given opened oat file with respect
-  // to the dex location.
+  // Return the status for a given opened oat file with respect to the dex
+  // location.
   OatStatus GivenOatFileStatus(const OatFile& file);
-  bool GivenOatFileIsOutOfDate(const OatFile& file);
-  bool GivenOatFileNeedsRelocation(const OatFile& file);
-  bool GivenOatFileIsUpToDate(const OatFile& file);
 
   // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
@@ -261,14 +252,15 @@
   // attempted. error_msg must not be null.
   ResultOfAttemptToUpdate RelocateOatFile(const std::string* input_file, std::string* error_msg);
 
-  // Generate the oat file from the dex file using the given compiler filter.
+  // Generate the oat file from the dex file using the current runtime
+  // compiler options.
   // This does not check the current status before attempting to generate the
   // oat file.
   //
   // If the result is not kUpdateSucceeded, the value of error_msg will be set
   // to a string describing why there was a failure or the update was not
   // attempted. error_msg must not be null.
-  ResultOfAttemptToUpdate GenerateOatFile(CompilerFilter::Filter filter, std::string* error_msg);
+  ResultOfAttemptToUpdate GenerateOatFile(std::string* error_msg);
 
   // Executes dex2oat using the current runtime configuration overridden with
   // the given arguments. This does not check to see if dex2oat is enabled in
@@ -285,10 +277,24 @@
   // Constructs the odex file name for the given dex location.
   // Returns true on success, in which case odex_filename is set to the odex
   // file name.
-  // Returns false on error, in which case error_msg describes the error.
+  // Returns false on error, in which case error_msg describes the error and
+  // odex_filename is not changed.
   // Neither odex_filename nor error_msg may be null.
-  static bool DexFilenameToOdexFilename(const std::string& location,
-      InstructionSet isa, std::string* odex_filename, std::string* error_msg);
+  static bool DexLocationToOdexFilename(const std::string& location,
+                                        InstructionSet isa,
+                                        std::string* odex_filename,
+                                        std::string* error_msg);
+
+  // Constructs the oat file name for the given dex location.
+  // Returns true on success, in which case oat_filename is set to the oat
+  // file name.
+  // Returns false on error, in which case error_msg describes the error and
+  // oat_filename is not changed.
+  // Neither oat_filename nor error_msg may be null.
+  static bool DexLocationToOatFilename(const std::string& location,
+                                       InstructionSet isa,
+                                       std::string* oat_filename,
+                                       std::string* error_msg);
 
   static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA);
 
@@ -300,11 +306,73 @@
     std::string location;
   };
 
-  // Returns the path to the dalvik cache directory.
-  // Does not check existence of the cache or try to create it.
-  // Includes the trailing slash.
-  // Returns an empty string if we can't get the dalvik cache directory path.
-  std::string DalvikCacheDirectory();
+  class OatFileInfo {
+   public:
+    // Initially the info is for no file in particular. It will treat the
+    // file as out of date until Reset is called with a real filename to use
+    // the cache for.
+    explicit OatFileInfo(OatFileAssistant* oat_file_assistant);
+
+    const std::string* Filename();
+    bool Exists();
+    OatStatus Status();
+    bool IsOutOfDate();
+    bool NeedsRelocation();
+    bool IsUpToDate();
+    // Must only be called if the associated file exists, i.e, if
+    // |Exists() == true|.
+    CompilerFilter::Filter CompilerFilter();
+
+    // Returns the loaded file.
+    // Loads the file if needed. Returns null if the file failed to load.
+    // The caller shouldn't clean up or free the returned pointer.
+    const OatFile* GetFile();
+
+    // Returns true if the compiler filter used to generate the file is at
+    // least as good as the given target filter. profile_changed should be
+    // true to indicate the profile has recently changed for this dex
+    // location.
+    bool CompilerFilterIsOkay(CompilerFilter::Filter target, bool profile_changed);
+
+    // Returns true if the file is opened executable.
+    bool IsExecutable();
+
+    // Returns true if the file has patch info required to run patchoat.
+    bool HasPatchInfo();
+
+    // Clear any cached information about the file that depends on the
+    // contents of the file. This does not reset the provided filename.
+    void Reset();
+
+    // Clear any cached information and switch to getting info about the oat
+    // file with the given filename.
+    void Reset(const std::string& filename);
+
+    // Release the loaded oat file.
+    // Returns null if the oat file hasn't been loaded.
+    //
+    // After this call, no other methods of the OatFileInfo should be
+    // called, because access to the loaded oat file has been taken away from
+    // the OatFileInfo object.
+    std::unique_ptr<OatFile> ReleaseFile();
+
+   private:
+    OatFileAssistant* oat_file_assistant_;
+
+    bool filename_provided_ = false;
+    std::string filename_;
+
+    bool load_attempted_ = false;
+    std::unique_ptr<OatFile> file_;
+
+    bool status_attempted_ = false;
+    OatStatus status_;
+
+    // For debugging only.
+    // If this flag is set, the file has been released to the user and the
+    // OatFileInfo object is in a bad state and should no longer be used.
+    bool file_released_ = false;
+  };
 
   // Returns the current image location.
   // Returns an empty string if the image location could not be retrieved.
@@ -321,44 +389,6 @@
   // found for the dex_location_ dex file.
   const uint32_t* GetRequiredDexChecksum();
 
-  // Returns the loaded odex file.
-  // Loads the file if needed. Returns null if the file failed to load.
-  // The caller shouldn't clean up or free the returned pointer.
-  const OatFile* GetOdexFile();
-
-  // Returns true if the compiler filter used to generate the odex file is at
-  // least as good as the given target filter.
-  bool OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target);
-
-  // Returns true if the odex file is opened executable.
-  bool OdexFileIsExecutable();
-
-  // Returns true if the odex file has patch info required to run patchoat.
-  bool OdexFileHasPatchInfo();
-
-  // Clear any cached information about the odex file that depends on the
-  // contents of the file.
-  void ClearOdexFileCache();
-
-  // Returns the loaded oat file.
-  // Loads the file if needed. Returns null if the file failed to load.
-  // The caller shouldn't clean up or free the returned pointer.
-  const OatFile* GetOatFile();
-
-  // Returns true if the compiler filter used to generate the oat file is at
-  // least as good as the given target filter.
-  bool OatFileCompilerFilterIsOkay(CompilerFilter::Filter target);
-
-  // Returns true if the oat file is opened executable.
-  bool OatFileIsExecutable();
-
-  // Returns true if the oat file has patch info required to run patchoat.
-  bool OatFileHasPatchInfo();
-
-  // Clear any cached information about the oat file that depends on the
-  // contents of the file.
-  void ClearOatFileCache();
-
   // Returns the loaded image info.
   // Loads the image info if needed. Returns null if the image info failed
   // to load.
@@ -378,9 +408,6 @@
   // the 32 or 64 bit variant for the current device.
   const InstructionSet isa_ = kNone;
 
-  // Whether the profile has recently changed.
-  bool profile_changed_ = false;
-
   // Whether we will attempt to load oat files executable.
   bool load_executable_ = false;
 
@@ -391,45 +418,8 @@
   bool required_dex_checksum_found_;
   bool has_original_dex_files_;
 
-  // Cached value of the odex file name.
-  // This should be accessed only by the OdexFileName() method.
-  bool cached_odex_file_name_attempted_ = false;
-  bool cached_odex_file_name_found_;
-  std::string cached_odex_file_name_;
-
-  // Cached value of the loaded odex file.
-  // Use the GetOdexFile method rather than accessing this directly, unless you
-  // know the odex file isn't out of date.
-  bool odex_file_load_attempted_ = false;
-  std::unique_ptr<OatFile> cached_odex_file_;
-
-  // Cached results for OdexFileIsOutOfDate
-  bool odex_file_is_out_of_date_attempted_ = false;
-  bool cached_odex_file_is_out_of_date_;
-
-  // Cached results for OdexFileIsUpToDate
-  bool odex_file_is_up_to_date_attempted_ = false;
-  bool cached_odex_file_is_up_to_date_;
-
-  // Cached value of the oat file name.
-  // This should be accessed only by the OatFileName() method.
-  bool cached_oat_file_name_attempted_ = false;
-  bool cached_oat_file_name_found_;
-  std::string cached_oat_file_name_;
-
-  // Cached value of the loaded oat file.
-  // Use the GetOatFile method rather than accessing this directly, unless you
-  // know the oat file isn't out of date.
-  bool oat_file_load_attempted_ = false;
-  std::unique_ptr<OatFile> cached_oat_file_;
-
-  // Cached results for OatFileIsOutOfDate
-  bool oat_file_is_out_of_date_attempted_ = false;
-  bool cached_oat_file_is_out_of_date_;
-
-  // Cached results for OatFileIsUpToDate
-  bool oat_file_is_up_to_date_attempted_ = false;
-  bool cached_oat_file_is_up_to_date_;
+  OatFileInfo odex_;
+  OatFileInfo oat_;
 
   // Cached value of the image info.
   // Use the GetImageInfo method rather than accessing these directly.
@@ -440,12 +430,6 @@
   ImageInfo cached_image_info_;
   uint32_t combined_image_checksum_ = 0;
 
-  // For debugging only.
-  // If this flag is set, the oat or odex file has been released to the user
-  // of the OatFileAssistant object and the OatFileAssistant object is in a
-  // bad state and should no longer be used.
-  bool oat_file_released_ = false;
-
   DISALLOW_COPY_AND_ASSIGN(OatFileAssistant);
 };
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 156d394..05c5a22 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -213,22 +213,22 @@
 // generation of oat files.
 static void GenerateOatForTest(const char* dex_location, CompilerFilter::Filter filter) {
   // Use an oat file assistant to find the proper oat location.
-  OatFileAssistant ofa(dex_location, kRuntimeISA, false, false);
-  const std::string* oat_location = ofa.OatFileName();
-  ASSERT_TRUE(oat_location != nullptr);
+  std::string oat_location;
+  std::string error_msg;
+  ASSERT_TRUE(OatFileAssistant::DexLocationToOatFilename(
+        dex_location, kRuntimeISA, &oat_location, &error_msg)) << error_msg;
 
   std::vector<std::string> args;
   args.push_back("--dex-file=" + std::string(dex_location));
-  args.push_back("--oat-file=" + *oat_location);
+  args.push_back("--oat-file=" + oat_location);
   args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
   args.push_back("--runtime-arg");
   args.push_back("-Xnorelocate");
-  std::string error_msg;
   ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
 
   // Verify the oat file was generated as expected.
-  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location->c_str(),
-                                                  oat_location->c_str(),
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location.c_str(),
+                                                  oat_location.c_str(),
                                                   nullptr,
                                                   nullptr,
                                                   false,
@@ -245,7 +245,7 @@
   std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -275,7 +275,7 @@
 TEST_F(OatFileAssistantTest, NoDexNoOat) {
   std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar";
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -283,8 +283,7 @@
 
   // Trying to make the oat file up to date should not fail or crash.
   std::string error_msg;
-  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg));
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg));
 
   // Trying to get the best oat file should fail, but not crash.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -298,7 +297,7 @@
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -321,19 +320,52 @@
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
+// Case: We have a DEX file and ODEX file for a different dex location.
+// Expect: The status is kDex2OatNeeded.
+TEST_F(OatFileAssistantTest, OatForDifferentDex) {
+  // Generate an odex file for OatForDifferentDex_A.jar
+  std::string dex_location_a = GetScratchDir() + "/OatForDifferentDex_A.jar";
+  std::string odex_location = GetOdexDir() + "/OatForDifferentDex.odex";
+  Copy(GetDexSrc1(), dex_location_a);
+  GenerateOdexForTest(dex_location_a, odex_location, CompilerFilter::kSpeed);
+
+  // Try to use that odex file for OatForDifferentDex.jar
+  std::string dex_location = GetScratchDir() + "/OatForDifferentDex.jar";
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileExists());
+  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+}
+
 // Case: We have a DEX file and speed-profile OAT file for it.
-// Expect: The status is kNoDexOptNeeded if the profile hasn't changed.
+// Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but
+// kDex2Oat if the profile has changed.
 TEST_F(OatFileAssistantTest, ProfileOatUpToDate) {
   std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, false));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, false));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, true));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, true));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -347,32 +379,6 @@
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
-// Case: We have a DEX file and speed-profile OAT file for it.
-// Expect: The status is kNoDex2OatNeeded if the profile has changed.
-TEST_F(OatFileAssistantTest, ProfileOatOutOfDate) {
-  std::string dex_location = GetScratchDir() + "/ProfileOatOutOfDate.jar";
-  Copy(GetDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
-
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true, false);
-
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
-
-  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus());
-  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
-}
-
 // Case: We have a MultiDEX file and up-to-date OAT file for it.
 // Expect: The status is kNoDexOptNeeded and we load all dex files.
 TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
@@ -380,9 +386,9 @@
   Copy(GetMultiDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
   // Verify we can load both dex files.
@@ -407,9 +413,9 @@
   // is out of date.
   Copy(GetMultiDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -436,7 +442,7 @@
   // Verify we can load both dex files.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
                                       oat_location.c_str(),
-                                      kRuntimeISA, false, true);
+                                      kRuntimeISA, true);
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
   EXPECT_TRUE(oat_file->IsExecutable());
@@ -456,7 +462,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   Copy(GetDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
@@ -483,7 +489,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -519,7 +525,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -535,8 +541,9 @@
 
   // Make the oat file up to date.
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -577,7 +584,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -598,8 +605,9 @@
 
   // Make the oat file up to date.
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -634,7 +642,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -655,8 +663,9 @@
 
   // Make the oat file up to date. This should have no effect.
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -684,7 +693,7 @@
   GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
@@ -706,8 +715,9 @@
 
   // Make the oat file up to date.
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -743,15 +753,16 @@
   GenerateNoPatchOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   // Make the oat file up to date.
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
@@ -781,7 +792,7 @@
 
   // Verify things don't go bad.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -816,7 +827,7 @@
   GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -844,7 +855,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kVerifyAtRuntime);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -870,7 +881,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -889,7 +900,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kInterpretOnly);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -908,7 +919,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -928,10 +939,11 @@
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -943,7 +955,7 @@
   EXPECT_TRUE(OS::FileExists(oat_location.c_str()));
 
   // Verify it didn't create an oat in the default location.
-  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_FALSE(ofm.OatFileExists());
 }
 
@@ -959,10 +971,11 @@
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg));
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() == nullptr);
@@ -975,10 +988,11 @@
   std::string oat_location = GetScratchDir() + "/GenNoDex.oat";
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.GenerateOatFile(CompilerFilter::kSpeed, &error_msg));
+      oat_file_assistant.GenerateOatFile(&error_msg));
 }
 
 // Turn an absolute path into a path relative to the current working
@@ -1024,7 +1038,7 @@
   Copy(GetDexSrc1(), abs_dex_location);
 
   std::string dex_location = MakePathRelative(abs_dex_location);
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
@@ -1042,7 +1056,7 @@
 TEST_F(OatFileAssistantTest, ShortDexLocation) {
   std::string dex_location = "/xx";
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
@@ -1057,8 +1071,9 @@
 
   // Trying to make it up to date should have no effect.
   std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg));
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
   EXPECT_TRUE(error_msg.empty());
 }
 
@@ -1068,7 +1083,7 @@
   std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -1165,7 +1180,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Load the oat using an executable oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1187,7 +1202,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Load the oat using an executable oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1197,21 +1212,49 @@
   EXPECT_EQ(2u, dex_files.size());
 }
 
-TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) {
+TEST_F(OatFileAssistantTest, RuntimeCompilerFilterOptionUsed) {
+  std::string dex_location = GetScratchDir() + "/RuntimeCompilerFilterOptionUsed.jar";
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=interpret-only");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  Runtime::Current()->AddCompilerOption("--compiler-filter=bogus");
+  EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
+}
+
+TEST(OatFileAssistantUtilsTest, DexLocationToOdexFilename) {
   std::string error_msg;
   std::string odex_file;
 
-  EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz.jar", kArm, &odex_file, &error_msg)) << error_msg;
   EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
 
-  EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz.funnyext", kArm, &odex_file, &error_msg)) << error_msg;
   EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
 
-  EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename(
         "nopath.jar", kArm, &odex_file, &error_msg));
-  EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
 }
 
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 4abf230..2e67ffe 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -39,8 +39,6 @@
 // If true, then we attempt to load the application image if it exists.
 static constexpr bool kEnableAppImage = true;
 
-CompilerFilter::Filter OatFileManager::filter_ = CompilerFilter::Filter::kSpeed;
-
 const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
   WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
   DCHECK(oat_file != nullptr);
@@ -502,6 +500,8 @@
     return false;
   }
 
+  ScopedTrace st("Collision check");
+
   // Add dex files from the oat file to check.
   AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue, &opened_dex_files);
 
@@ -558,7 +558,6 @@
   OatFileAssistant oat_file_assistant(dex_location,
                                       oat_location,
                                       kRuntimeISA,
-                                      /*profile_changed*/false,
                                       !runtime->IsAotCompiler());
 
   // Lock the target oat location to avoid races generating and loading the
@@ -573,9 +572,10 @@
   const OatFile* source_oat_file = nullptr;
 
   if (!oat_file_assistant.IsUpToDate()) {
-    // Update the oat file on disk if we can. This may fail, but that's okay.
-    // Best effort is all that matters here.
-    switch (oat_file_assistant.MakeUpToDate(filter_, /*out*/ &error_msg)) {
+    // Update the oat file on disk if we can, based on the --compiler-filter
+    // option derived from the current runtime options.
+    // This may fail, but that's okay. Best effort is all that matters here.
+    switch (oat_file_assistant.MakeUpToDate(/*profile_changed*/false, /*out*/ &error_msg)) {
       case OatFileAssistant::kUpdateFailed:
         LOG(WARNING) << error_msg;
         break;
@@ -632,8 +632,8 @@
   if (source_oat_file != nullptr) {
     bool added_image_space = false;
     if (source_oat_file->IsExecutable()) {
-      std::unique_ptr<gc::space::ImageSpace> image_space(
-          kEnableAppImage ? oat_file_assistant.OpenImageSpace(source_oat_file) : nullptr);
+      std::unique_ptr<gc::space::ImageSpace> image_space =
+          kEnableAppImage ? oat_file_assistant.OpenImageSpace(source_oat_file) : nullptr;
       if (image_space != nullptr) {
         ScopedObjectAccess soa(self);
         StackHandleScope<1> hs(self);
@@ -695,7 +695,9 @@
   if (dex_files.empty()) {
     if (oat_file_assistant.HasOriginalDexFiles()) {
       if (Runtime::Current()->IsDexFileFallbackEnabled()) {
-        if (!DexFile::Open(dex_location, dex_location, /*out*/ &error_msg, &dex_files)) {
+        static constexpr bool kVerifyChecksum = true;
+        if (!DexFile::Open(
+            dex_location, dex_location, kVerifyChecksum, /*out*/ &error_msg, &dex_files)) {
           LOG(WARNING) << error_msg;
           error_msgs->push_back("Failed to open dex files from " + std::string(dex_location)
                                 + " because: " + error_msg);
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index 1d5b872..45ac4b7 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -25,7 +25,6 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "compiler_filter.h"
 #include "jni.h"
 
 namespace art {
@@ -106,10 +105,6 @@
 
   void DumpForSigQuit(std::ostream& os);
 
-  static void SetCompilerFilter(CompilerFilter::Filter filter) {
-    filter_ = filter;
-  }
-
  private:
   // Check that the shared libraries in the given oat file match those in the given class loader and
   // dex elements. If the class loader is null or we do not support one of the class loaders in the
@@ -129,9 +124,6 @@
   std::set<std::unique_ptr<const OatFile>> oat_files_ GUARDED_BY(Locks::oat_file_manager_lock_);
   bool have_non_pic_oat_file_;
 
-  // The compiler filter used for oat files loaded by the oat file manager.
-  static CompilerFilter::Filter filter_;
-
   DISALLOW_COPY_AND_ASSIGN(OatFileManager);
 };
 
diff --git a/runtime/object_lock.cc b/runtime/object_lock.cc
index f7accc0..b8754a4 100644
--- a/runtime/object_lock.cc
+++ b/runtime/object_lock.cc
@@ -47,7 +47,22 @@
   obj_->NotifyAll(self_);
 }
 
+template <typename T>
+ObjectTryLock<T>::ObjectTryLock(Thread* self, Handle<T> object) : self_(self), obj_(object) {
+  CHECK(object.Get() != nullptr);
+  acquired_ = obj_->MonitorTryEnter(self_) != nullptr;
+}
+
+template <typename T>
+ObjectTryLock<T>::~ObjectTryLock() {
+  if (acquired_) {
+    obj_->MonitorExit(self_);
+  }
+}
+
 template class ObjectLock<mirror::Class>;
 template class ObjectLock<mirror::Object>;
+template class ObjectTryLock<mirror::Class>;
+template class ObjectTryLock<mirror::Object>;
 
 }  // namespace art
diff --git a/runtime/object_lock.h b/runtime/object_lock.h
index eb7cbd8..7f02b37 100644
--- a/runtime/object_lock.h
+++ b/runtime/object_lock.h
@@ -45,6 +45,27 @@
   DISALLOW_COPY_AND_ASSIGN(ObjectLock);
 };
 
+template <typename T>
+class ObjectTryLock {
+ public:
+  ObjectTryLock(Thread* self, Handle<T> object) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ~ObjectTryLock() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool Acquired() const {
+    return acquired_;
+  }
+
+ private:
+  Thread* const self_;
+  Handle<T> const obj_;
+  bool acquired_;
+
+
+  DISALLOW_COPY_AND_ASSIGN(ObjectTryLock);
+};
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_OBJECT_LOCK_H_
diff --git a/runtime/offsets.h b/runtime/offsets.h
index 9d5063f..aaf5c0c 100644
--- a/runtime/offsets.h
+++ b/runtime/offsets.h
@@ -19,6 +19,7 @@
 
 #include <ostream>
 
+#include "base/enums.h"
 #include "globals.h"
 
 namespace art {
@@ -51,12 +52,15 @@
 };
 
 // Offsets relative to the current running thread.
-template<size_t pointer_size>
+template<PointerSize pointer_size>
 class ThreadOffset : public Offset {
  public:
   explicit ThreadOffset(size_t val) : Offset(val) {}
 };
 
+using ThreadOffset32 = ThreadOffset<PointerSize::k32>;
+using ThreadOffset64 = ThreadOffset<PointerSize::k64>;
+
 // Offsets relative to an object.
 class MemberOffset : public Offset {
  public:
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
index ba71dc3..54ec5d3 100644
--- a/runtime/openjdkjvm/OpenjdkJvm.cc
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -69,11 +69,15 @@
 /* posix open() with extensions; used by e.g. ZipFile */
 JNIEXPORT jint JVM_Open(const char* fname, jint flags, jint mode) {
     /*
-     * The call is expected to handle JVM_O_DELETE, which causes the file
-     * to be removed after it is opened.  Also, some code seems to
-     * want the special return value JVM_EEXIST if the file open fails
-     * due to O_EXCL.
+     * Some code seems to want the special return value JVM_EEXIST if the
+     * file open fails due to O_EXCL.
      */
+    // Don't use JVM_O_DELETE, it's problematic with FUSE, see b/28901232.
+    if (flags & JVM_O_DELETE) {
+        LOG(FATAL) << "JVM_O_DELETE option is not supported (while opening: '"
+                   << fname << "')";
+    }
+
     int fd = TEMP_FAILURE_RETRY(open(fname, flags & ~JVM_O_DELETE, mode));
     if (fd < 0) {
         int err = errno;
@@ -84,12 +88,6 @@
         }
     }
 
-    if (flags & JVM_O_DELETE) {
-        if (unlink(fname) != 0) {
-            LOG(WARNING) << "Post-open deletion of '" << fname << "' failed: " << strerror(errno);
-        }
-    }
-
     return fd;
 }
 
diff --git a/test/955-lambda-smali/build b/runtime/openjdkjvmti/Android.mk
old mode 100755
new mode 100644
similarity index 75%
copy from test/955-lambda-smali/build
copy to runtime/openjdkjvmti/Android.mk
index 14230c2..1de20e8
--- a/test/955-lambda-smali/build
+++ b/runtime/openjdkjvmti/Android.mk
@@ -1,6 +1,5 @@
-#!/bin/bash
 #
-# Copyright 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# make us exit on a failure
-set -e
+LOCAL_PATH := $(call my-dir)
 
-./default-build "$@" --experimental default-methods
+include $(CLEAR_VARS)
+LOCAL_MODULE := openjdkjvmti-phony
+include $(BUILD_PHONY_PACKAGE)
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/runtime/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to runtime/openjdkjvmti/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
diff --git a/runtime/openjdkjvmti/NOTICE b/runtime/openjdkjvmti/NOTICE
new file mode 100644
index 0000000..6ec62cd
--- /dev/null
+++ b/runtime/openjdkjvmti/NOTICE
@@ -0,0 +1,29 @@
+Copyright (C) 2016 The Android Open Source Project
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This file implements interfaces from the file jvmti.h. This implementation
+is licensed under the same terms as the file jvmti.h.  The
+copyright and license information for the file jvmti.h follows.
+
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License version 2 only, as
+published by the Free Software Foundation.  Oracle designates this
+particular file as subject to the "Classpath" exception as provided
+by Oracle in the LICENSE file that accompanied this code.
+
+This code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+version 2 for more details (a copy is included in the LICENSE file that
+accompanied this code).
+
+You should have received a copy of the GNU General Public License version
+2 along with this work; if not, write to the Free Software Foundation,
+Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+or visit www.oracle.com if you need additional information or have any
+questions.
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
new file mode 100644
index 0000000..339c457
--- /dev/null
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -0,0 +1,1120 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <jni.h>
+#include "openjdkjvmti/jvmti.h"
+
+#include "gc_root-inl.h"
+#include "globals.h"
+#include "jni_env_ext-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread_list.h"
+
+// TODO Remove this at some point by annotating all the methods. It was put in to make the skeleton
+// easier to create.
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+
+namespace openjdkjvmti {
+
+extern const jvmtiInterface_1 gJvmtiInterface;
+
+// A structure that is a jvmtiEnv with additional information for the runtime.
+struct ArtJvmTiEnv : public jvmtiEnv {
+  art::JavaVMExt* art_vm;
+  void* local_data;
+
+  explicit ArtJvmTiEnv(art::JavaVMExt* runtime) : art_vm(runtime), local_data(nullptr) {
+    functions = &gJvmtiInterface;
+  }
+};
+
+// Macro and constexpr to make error values less annoying to write.
+#define ERR(e) JVMTI_ERROR_ ## e
+static constexpr jvmtiError OK = JVMTI_ERROR_NONE;
+
+// Special error code for unimplemented functions in JVMTI
+static constexpr jvmtiError ERR(NOT_IMPLEMENTED) = JVMTI_ERROR_NOT_AVAILABLE;
+
+class JvmtiFunctions {
+ private:
+  static bool IsValidEnv(jvmtiEnv* env) {
+    return env != nullptr;
+  }
+
+ public:
+  static jvmtiError Allocate(jvmtiEnv* env, jlong size, unsigned char** mem_ptr) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    if (mem_ptr == nullptr) {
+      return ERR(NULL_POINTER);
+    }
+    if (size < 0) {
+      return ERR(ILLEGAL_ARGUMENT);
+    } else if (size == 0) {
+      *mem_ptr = nullptr;
+      return OK;
+    }
+    *mem_ptr = static_cast<unsigned char*>(malloc(size));
+    return (*mem_ptr != nullptr) ? OK : ERR(OUT_OF_MEMORY);
+  }
+
+  static jvmtiError Deallocate(jvmtiEnv* env, unsigned char* mem) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    if (mem != nullptr) {
+      free(mem);
+    }
+    return OK;
+  }
+
+  static jvmtiError GetThreadState(jvmtiEnv* env, jthread thread, jint* thread_state_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCurrentThread(jvmtiEnv* env, jthread* thread_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetAllThreads(jvmtiEnv* env, jint* threads_count_ptr, jthread** threads_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SuspendThread(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SuspendThreadList(jvmtiEnv* env,
+                                      jint request_count,
+                                      const jthread* request_list,
+                                      jvmtiError* results) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ResumeThread(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ResumeThreadList(jvmtiEnv* env,
+                                     jint request_count,
+                                     const jthread* request_list,
+                                     jvmtiError* results) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError StopThread(jvmtiEnv* env, jthread thread, jobject exception) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadInfo(jvmtiEnv* env, jthread thread, jvmtiThreadInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetOwnedMonitorInfo(jvmtiEnv* env,
+                                        jthread thread,
+                                        jint* owned_monitor_count_ptr,
+                                        jobject** owned_monitors_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetOwnedMonitorStackDepthInfo(jvmtiEnv* env,
+                                                  jthread thread,
+                                                  jint* monitor_info_count_ptr,
+                                                  jvmtiMonitorStackDepthInfo** monitor_info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCurrentContendedMonitor(jvmtiEnv* env,
+                                               jthread thread,
+                                               jobject* monitor_ptr) {
+  return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RunAgentThread(jvmtiEnv* env,
+                                   jthread thread,
+                                   jvmtiStartFunction proc,
+                                   const void* arg,
+                                   jint priority) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetThreadLocalStorage(jvmtiEnv* env, jthread thread, const void* data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadLocalStorage(jvmtiEnv* env, jthread thread, void** data_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetTopThreadGroups(jvmtiEnv* env,
+                                       jint* group_count_ptr,
+                                       jthreadGroup** groups_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadGroupInfo(jvmtiEnv* env,
+                                       jthreadGroup group,
+                                       jvmtiThreadGroupInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadGroupChildren(jvmtiEnv* env,
+                                           jthreadGroup group,
+                                           jint* thread_count_ptr,
+                                           jthread** threads_ptr,
+                                           jint* group_count_ptr,
+                                           jthreadGroup** groups_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetStackTrace(jvmtiEnv* env,
+                                  jthread thread,
+                                  jint start_depth,
+                                  jint max_frame_count,
+                                  jvmtiFrameInfo* frame_buffer,
+                                  jint* count_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetAllStackTraces(jvmtiEnv* env,
+                                      jint max_frame_count,
+                                      jvmtiStackInfo** stack_info_ptr,
+                                      jint* thread_count_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadListStackTraces(jvmtiEnv* env,
+                                             jint thread_count,
+                                             const jthread* thread_list,
+                                             jint max_frame_count,
+                                             jvmtiStackInfo** stack_info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFrameCount(jvmtiEnv* env, jthread thread, jint* count_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError PopFrame(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFrameLocation(jvmtiEnv* env,
+                                     jthread thread,
+                                     jint depth,
+                                     jmethodID* method_ptr,
+                                     jlocation* location_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError NotifyFramePop(jvmtiEnv* env, jthread thread, jint depth) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnObject(jvmtiEnv* env, jthread thread, jobject value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnInt(jvmtiEnv* env, jthread thread, jint value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnLong(jvmtiEnv* env, jthread thread, jlong value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnFloat(jvmtiEnv* env, jthread thread, jfloat value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnDouble(jvmtiEnv* env, jthread thread, jdouble value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceEarlyReturnVoid(jvmtiEnv* env, jthread thread) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError FollowReferences(jvmtiEnv* env,
+                                     jint heap_filter,
+                                     jclass klass,
+                                     jobject initial_object,
+                                     const jvmtiHeapCallbacks* callbacks,
+                                     const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateThroughHeap(jvmtiEnv* env,
+                                       jint heap_filter,
+                                       jclass klass,
+                                       const jvmtiHeapCallbacks* callbacks,
+                                       const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetTag(jvmtiEnv* env, jobject object, jlong* tag_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetTag(jvmtiEnv* env, jobject object, jlong tag) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetObjectsWithTags(jvmtiEnv* env,
+                                       jint tag_count,
+                                       const jlong* tags,
+                                       jint* count_ptr,
+                                       jobject** object_result_ptr,
+                                       jlong** tag_result_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ForceGarbageCollection(jvmtiEnv* env) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateOverObjectsReachableFromObject(
+      jvmtiEnv* env,
+      jobject object,
+      jvmtiObjectReferenceCallback object_reference_callback,
+      const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateOverReachableObjects(jvmtiEnv* env,
+                                                jvmtiHeapRootCallback heap_root_callback,
+                                                jvmtiStackReferenceCallback stack_ref_callback,
+                                                jvmtiObjectReferenceCallback object_ref_callback,
+                                                const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateOverHeap(jvmtiEnv* env,
+                                    jvmtiHeapObjectFilter object_filter,
+                                    jvmtiHeapObjectCallback heap_object_callback,
+                                    const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IterateOverInstancesOfClass(jvmtiEnv* env,
+                                                jclass klass,
+                                                jvmtiHeapObjectFilter object_filter,
+                                                jvmtiHeapObjectCallback heap_object_callback,
+                                                const void* user_data) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalObject(jvmtiEnv* env,
+                                   jthread thread,
+                                   jint depth,
+                                   jint slot,
+                                   jobject* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalInstance(jvmtiEnv* env,
+                                     jthread thread,
+                                     jint depth,
+                                     jobject* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalInt(jvmtiEnv* env,
+                                jthread thread,
+                                jint depth,
+                                jint slot,
+                                jint* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalLong(jvmtiEnv* env,
+                                 jthread thread,
+                                 jint depth,
+                                 jint slot,
+                                 jlong* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalFloat(jvmtiEnv* env,
+                                  jthread thread,
+                                  jint depth,
+                                  jint slot,
+                                  jfloat* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalDouble(jvmtiEnv* env,
+                                   jthread thread,
+                                   jint depth,
+                                   jint slot,
+                                   jdouble* value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalObject(jvmtiEnv* env,
+                                   jthread thread,
+                                   jint depth,
+                                   jint slot,
+                                   jobject value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalInt(jvmtiEnv* env,
+                                jthread thread,
+                                jint depth,
+                                jint slot,
+                                jint value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalLong(jvmtiEnv* env,
+                                 jthread thread,
+                                 jint depth,
+                                 jint slot,
+                                 jlong value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalFloat(jvmtiEnv* env,
+                                  jthread thread,
+                                  jint depth,
+                                  jint slot,
+                                  jfloat value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetLocalDouble(jvmtiEnv* env,
+                                   jthread thread,
+                                   jint depth,
+                                   jint slot,
+                                   jdouble value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetBreakpoint(jvmtiEnv* env, jmethodID method, jlocation location) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ClearBreakpoint(jvmtiEnv* env, jmethodID method, jlocation location) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetFieldAccessWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ClearFieldAccessWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetFieldModificationWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError ClearFieldModificationWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLoadedClasses(jvmtiEnv* env, jint* class_count_ptr, jclass** classes_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassLoaderClasses(jvmtiEnv* env,
+                                          jobject initiating_loader,
+                                          jint* class_count_ptr,
+                                          jclass** classes_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassSignature(jvmtiEnv* env,
+                                      jclass klass,
+                                      char** signature_ptr,
+                                      char** generic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassStatus(jvmtiEnv* env, jclass klass, jint* status_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetSourceFileName(jvmtiEnv* env, jclass klass, char** source_name_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassModifiers(jvmtiEnv* env, jclass klass, jint* modifiers_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassMethods(jvmtiEnv* env,
+                                    jclass klass,
+                                    jint* method_count_ptr,
+                                    jmethodID** methods_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassFields(jvmtiEnv* env,
+                                   jclass klass,
+                                   jint* field_count_ptr,
+                                   jfieldID** fields_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetImplementedInterfaces(jvmtiEnv* env,
+                                             jclass klass,
+                                             jint* interface_count_ptr,
+                                             jclass** interfaces_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassVersionNumbers(jvmtiEnv* env,
+                                           jclass klass,
+                                           jint* minor_version_ptr,
+                                           jint* major_version_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetConstantPool(jvmtiEnv* env,
+                                    jclass klass,
+                                    jint* constant_pool_count_ptr,
+                                    jint* constant_pool_byte_count_ptr,
+                                    unsigned char** constant_pool_bytes_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsInterface(jvmtiEnv* env, jclass klass, jboolean* is_interface_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsArrayClass(jvmtiEnv* env,
+                                 jclass klass,
+                                 jboolean* is_array_class_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsModifiableClass(jvmtiEnv* env,
+                                      jclass klass,
+                                      jboolean* is_modifiable_class_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetClassLoader(jvmtiEnv* env, jclass klass, jobject* classloader_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetSourceDebugExtension(jvmtiEnv* env,
+                                            jclass klass,
+                                            char** source_debug_extension_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RetransformClasses(jvmtiEnv* env, jint class_count, const jclass* classes) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RedefineClasses(jvmtiEnv* env,
+                                    jint class_count,
+                                    const jvmtiClassDefinition* class_definitions) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetObjectSize(jvmtiEnv* env, jobject object, jlong* size_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetObjectHashCode(jvmtiEnv* env, jobject object, jint* hash_code_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetObjectMonitorUsage(jvmtiEnv* env,
+                                          jobject object,
+                                          jvmtiMonitorUsage* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFieldName(jvmtiEnv* env,
+                                 jclass klass,
+                                 jfieldID field,
+                                 char** name_ptr,
+                                 char** signature_ptr,
+                                 char** generic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFieldDeclaringClass(jvmtiEnv* env,
+                                           jclass klass,
+                                           jfieldID field,
+                                           jclass* declaring_class_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetFieldModifiers(jvmtiEnv* env,
+                                      jclass klass,
+                                      jfieldID field,
+                                      jint* modifiers_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsFieldSynthetic(jvmtiEnv* env,
+                                     jclass klass,
+                                     jfieldID field,
+                                     jboolean* is_synthetic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMethodName(jvmtiEnv* env,
+                                  jmethodID method,
+                                  char** name_ptr,
+                                  char** signature_ptr,
+                                  char** generic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMethodDeclaringClass(jvmtiEnv* env,
+                                            jmethodID method,
+                                            jclass* declaring_class_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMethodModifiers(jvmtiEnv* env,
+                                       jmethodID method,
+                                       jint* modifiers_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMaxLocals(jvmtiEnv* env,
+                                 jmethodID method,
+                                 jint* max_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetArgumentsSize(jvmtiEnv* env,
+                                     jmethodID method,
+                                     jint* size_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLineNumberTable(jvmtiEnv* env,
+                                       jmethodID method,
+                                       jint* entry_count_ptr,
+                                       jvmtiLineNumberEntry** table_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetMethodLocation(jvmtiEnv* env,
+                                      jmethodID method,
+                                      jlocation* start_location_ptr,
+                                      jlocation* end_location_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetLocalVariableTable(jvmtiEnv* env,
+                                          jmethodID method,
+                                          jint* entry_count_ptr,
+                                          jvmtiLocalVariableEntry** table_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetBytecodes(jvmtiEnv* env,
+                                 jmethodID method,
+                                 jint* bytecode_count_ptr,
+                                 unsigned char** bytecodes_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsMethodNative(jvmtiEnv* env, jmethodID method, jboolean* is_native_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsMethodSynthetic(jvmtiEnv* env, jmethodID method, jboolean* is_synthetic_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError IsMethodObsolete(jvmtiEnv* env, jmethodID method, jboolean* is_obsolete_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetNativeMethodPrefix(jvmtiEnv* env, const char* prefix) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetNativeMethodPrefixes(jvmtiEnv* env, jint prefix_count, char** prefixes) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError CreateRawMonitor(jvmtiEnv* env, const char* name, jrawMonitorID* monitor_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError DestroyRawMonitor(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorEnter(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorExit(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorWait(jvmtiEnv* env, jrawMonitorID monitor, jlong millis) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorNotify(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RawMonitorNotifyAll(jvmtiEnv* env, jrawMonitorID monitor) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetJNIFunctionTable(jvmtiEnv* env, const jniNativeInterface* function_table) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetJNIFunctionTable(jvmtiEnv* env, jniNativeInterface** function_table) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetEventCallbacks(jvmtiEnv* env,
+                                      const jvmtiEventCallbacks* callbacks,
+                                      jint size_of_callbacks) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetEventNotificationMode(jvmtiEnv* env,
+                                             jvmtiEventMode mode,
+                                             jvmtiEvent event_type,
+                                             jthread event_thread,
+                                             ...) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GenerateEvents(jvmtiEnv* env, jvmtiEvent event_type) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetExtensionFunctions(jvmtiEnv* env,
+                                          jint* extension_count_ptr,
+                                          jvmtiExtensionFunctionInfo** extensions) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetExtensionEvents(jvmtiEnv* env,
+                                       jint* extension_count_ptr,
+                                       jvmtiExtensionEventInfo** extensions) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetExtensionEventCallback(jvmtiEnv* env,
+                                              jint extension_event_index,
+                                              jvmtiExtensionEvent callback) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetPotentialCapabilities(jvmtiEnv* env, jvmtiCapabilities* capabilities_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError AddCapabilities(jvmtiEnv* env, const jvmtiCapabilities* capabilities_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError RelinquishCapabilities(jvmtiEnv* env,
+                                           const jvmtiCapabilities* capabilities_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCapabilities(jvmtiEnv* env, jvmtiCapabilities* capabilities_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetCurrentThreadCpuTime(jvmtiEnv* env, jlong* nanos_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetThreadCpuTime(jvmtiEnv* env, jthread thread, jlong* nanos_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetTime(jvmtiEnv* env, jlong* nanos_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetAvailableProcessors(jvmtiEnv* env, jint* processor_count_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError AddToBootstrapClassLoaderSearch(jvmtiEnv* env, const char* segment) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError AddToSystemClassLoaderSearch(jvmtiEnv* env, const char* segment) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetSystemProperties(jvmtiEnv* env, jint* count_ptr, char*** property_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetSystemProperty(jvmtiEnv* env, const char* property, char** value_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError SetSystemProperty(jvmtiEnv* env, const char* property, const char* value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetPhase(jvmtiEnv* env, jvmtiPhase* phase_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError DisposeEnvironment(jvmtiEnv* env) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    delete env;
+    return OK;
+  }
+
+  static jvmtiError SetEnvironmentLocalStorage(jvmtiEnv* env, const void* data) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    reinterpret_cast<ArtJvmTiEnv*>(env)->local_data = const_cast<void*>(data);
+    return OK;
+  }
+
+  static jvmtiError GetEnvironmentLocalStorage(jvmtiEnv* env, void** data_ptr) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    *data_ptr = reinterpret_cast<ArtJvmTiEnv*>(env)->local_data;
+    return OK;
+  }
+
+  static jvmtiError GetVersionNumber(jvmtiEnv* env, jint* version_ptr) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    *version_ptr = JVMTI_VERSION;
+    return OK;
+  }
+
+  static jvmtiError GetErrorName(jvmtiEnv* env, jvmtiError error,  char** name_ptr) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    if (name_ptr == nullptr) {
+      return ERR(NULL_POINTER);
+    }
+    switch (error) {
+#define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : do { \
+          *name_ptr = const_cast<char*>("JVMTI_ERROR_"#e); \
+          return OK; \
+        } while (false)
+      ERROR_CASE(NONE);
+      ERROR_CASE(INVALID_THREAD);
+      ERROR_CASE(INVALID_THREAD_GROUP);
+      ERROR_CASE(INVALID_PRIORITY);
+      ERROR_CASE(THREAD_NOT_SUSPENDED);
+      ERROR_CASE(THREAD_NOT_ALIVE);
+      ERROR_CASE(INVALID_OBJECT);
+      ERROR_CASE(INVALID_CLASS);
+      ERROR_CASE(CLASS_NOT_PREPARED);
+      ERROR_CASE(INVALID_METHODID);
+      ERROR_CASE(INVALID_LOCATION);
+      ERROR_CASE(INVALID_FIELDID);
+      ERROR_CASE(NO_MORE_FRAMES);
+      ERROR_CASE(OPAQUE_FRAME);
+      ERROR_CASE(TYPE_MISMATCH);
+      ERROR_CASE(INVALID_SLOT);
+      ERROR_CASE(DUPLICATE);
+      ERROR_CASE(NOT_FOUND);
+      ERROR_CASE(INVALID_MONITOR);
+      ERROR_CASE(NOT_MONITOR_OWNER);
+      ERROR_CASE(INTERRUPT);
+      ERROR_CASE(INVALID_CLASS_FORMAT);
+      ERROR_CASE(CIRCULAR_CLASS_DEFINITION);
+      ERROR_CASE(FAILS_VERIFICATION);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_METHOD_ADDED);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED);
+      ERROR_CASE(INVALID_TYPESTATE);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_METHOD_DELETED);
+      ERROR_CASE(UNSUPPORTED_VERSION);
+      ERROR_CASE(NAMES_DONT_MATCH);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED);
+      ERROR_CASE(UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED);
+      ERROR_CASE(UNMODIFIABLE_CLASS);
+      ERROR_CASE(NOT_AVAILABLE);
+      ERROR_CASE(MUST_POSSESS_CAPABILITY);
+      ERROR_CASE(NULL_POINTER);
+      ERROR_CASE(ABSENT_INFORMATION);
+      ERROR_CASE(INVALID_EVENT_TYPE);
+      ERROR_CASE(ILLEGAL_ARGUMENT);
+      ERROR_CASE(NATIVE_METHOD);
+      ERROR_CASE(CLASS_LOADER_UNSUPPORTED);
+      ERROR_CASE(OUT_OF_MEMORY);
+      ERROR_CASE(ACCESS_DENIED);
+      ERROR_CASE(WRONG_PHASE);
+      ERROR_CASE(INTERNAL);
+      ERROR_CASE(UNATTACHED_THREAD);
+      ERROR_CASE(INVALID_ENVIRONMENT);
+#undef ERROR_CASE
+      default: {
+        *name_ptr = const_cast<char*>("JVMTI_ERROR_UNKNOWN");
+        return ERR(ILLEGAL_ARGUMENT);
+      }
+    }
+  }
+
+  static jvmtiError SetVerboseFlag(jvmtiEnv* env, jvmtiVerboseFlag flag, jboolean value) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  static jvmtiError GetJLocationFormat(jvmtiEnv* env, jvmtiJlocationFormat* format_ptr) {
+    return ERR(NOT_IMPLEMENTED);
+  }
+};
+
+static bool IsJvmtiVersion(jint version) {
+  return version ==  JVMTI_VERSION_1 ||
+         version == JVMTI_VERSION_1_0 ||
+         version == JVMTI_VERSION_1_1 ||
+         version == JVMTI_VERSION_1_2 ||
+         version == JVMTI_VERSION;
+}
+
+// Creates a jvmtiEnv and returns it with the art::ti::Env that is associated with it. new_art_ti
+// is a pointer to the uninitialized memory for an art::ti::Env.
+static void CreateArtJvmTiEnv(art::JavaVMExt* vm, /*out*/void** new_jvmtiEnv) {
+  struct ArtJvmTiEnv* env = new ArtJvmTiEnv(vm);
+  *new_jvmtiEnv = env;
+}
+
+// A hook that the runtime uses to allow plugins to handle GetEnv calls. It returns true and
+// places the return value in 'env' if this library can handle the GetEnv request. Otherwise
+// returns false and does not modify the 'env' pointer.
+static jint GetEnvHandler(art::JavaVMExt* vm, /*out*/void** env, jint version) {
+  if (IsJvmtiVersion(version)) {
+    CreateArtJvmTiEnv(vm, env);
+    return JNI_OK;
+  } else {
+    printf("version 0x%x is not valid!", version);
+    return JNI_EVERSION;
+  }
+}
+
+// The plugin initialization function. This adds the jvmti environment.
+extern "C" bool ArtPlugin_Initialize() {
+  art::Runtime::Current()->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
+  return true;
+}
+
+// The actual struct holding all of the entrypoints into the jvmti interface.
+const jvmtiInterface_1 gJvmtiInterface = {
+  nullptr,  // reserved1
+  JvmtiFunctions::SetEventNotificationMode,
+  nullptr,  // reserved3
+  JvmtiFunctions::GetAllThreads,
+  JvmtiFunctions::SuspendThread,
+  JvmtiFunctions::ResumeThread,
+  JvmtiFunctions::StopThread,
+  JvmtiFunctions::InterruptThread,
+  JvmtiFunctions::GetThreadInfo,
+  JvmtiFunctions::GetOwnedMonitorInfo,  // 10
+  JvmtiFunctions::GetCurrentContendedMonitor,
+  JvmtiFunctions::RunAgentThread,
+  JvmtiFunctions::GetTopThreadGroups,
+  JvmtiFunctions::GetThreadGroupInfo,
+  JvmtiFunctions::GetThreadGroupChildren,
+  JvmtiFunctions::GetFrameCount,
+  JvmtiFunctions::GetThreadState,
+  JvmtiFunctions::GetCurrentThread,
+  JvmtiFunctions::GetFrameLocation,
+  JvmtiFunctions::NotifyFramePop,  // 20
+  JvmtiFunctions::GetLocalObject,
+  JvmtiFunctions::GetLocalInt,
+  JvmtiFunctions::GetLocalLong,
+  JvmtiFunctions::GetLocalFloat,
+  JvmtiFunctions::GetLocalDouble,
+  JvmtiFunctions::SetLocalObject,
+  JvmtiFunctions::SetLocalInt,
+  JvmtiFunctions::SetLocalLong,
+  JvmtiFunctions::SetLocalFloat,
+  JvmtiFunctions::SetLocalDouble,  // 30
+  JvmtiFunctions::CreateRawMonitor,
+  JvmtiFunctions::DestroyRawMonitor,
+  JvmtiFunctions::RawMonitorEnter,
+  JvmtiFunctions::RawMonitorExit,
+  JvmtiFunctions::RawMonitorWait,
+  JvmtiFunctions::RawMonitorNotify,
+  JvmtiFunctions::RawMonitorNotifyAll,
+  JvmtiFunctions::SetBreakpoint,
+  JvmtiFunctions::ClearBreakpoint,
+  nullptr,  // reserved40
+  JvmtiFunctions::SetFieldAccessWatch,
+  JvmtiFunctions::ClearFieldAccessWatch,
+  JvmtiFunctions::SetFieldModificationWatch,
+  JvmtiFunctions::ClearFieldModificationWatch,
+  JvmtiFunctions::IsModifiableClass,
+  JvmtiFunctions::Allocate,
+  JvmtiFunctions::Deallocate,
+  JvmtiFunctions::GetClassSignature,
+  JvmtiFunctions::GetClassStatus,
+  JvmtiFunctions::GetSourceFileName,  // 50
+  JvmtiFunctions::GetClassModifiers,
+  JvmtiFunctions::GetClassMethods,
+  JvmtiFunctions::GetClassFields,
+  JvmtiFunctions::GetImplementedInterfaces,
+  JvmtiFunctions::IsInterface,
+  JvmtiFunctions::IsArrayClass,
+  JvmtiFunctions::GetClassLoader,
+  JvmtiFunctions::GetObjectHashCode,
+  JvmtiFunctions::GetObjectMonitorUsage,
+  JvmtiFunctions::GetFieldName,  // 60
+  JvmtiFunctions::GetFieldDeclaringClass,
+  JvmtiFunctions::GetFieldModifiers,
+  JvmtiFunctions::IsFieldSynthetic,
+  JvmtiFunctions::GetMethodName,
+  JvmtiFunctions::GetMethodDeclaringClass,
+  JvmtiFunctions::GetMethodModifiers,
+  nullptr,  // reserved67
+  JvmtiFunctions::GetMaxLocals,
+  JvmtiFunctions::GetArgumentsSize,
+  JvmtiFunctions::GetLineNumberTable,  // 70
+  JvmtiFunctions::GetMethodLocation,
+  JvmtiFunctions::GetLocalVariableTable,
+  JvmtiFunctions::SetNativeMethodPrefix,
+  JvmtiFunctions::SetNativeMethodPrefixes,
+  JvmtiFunctions::GetBytecodes,
+  JvmtiFunctions::IsMethodNative,
+  JvmtiFunctions::IsMethodSynthetic,
+  JvmtiFunctions::GetLoadedClasses,
+  JvmtiFunctions::GetClassLoaderClasses,
+  JvmtiFunctions::PopFrame,  // 80
+  JvmtiFunctions::ForceEarlyReturnObject,
+  JvmtiFunctions::ForceEarlyReturnInt,
+  JvmtiFunctions::ForceEarlyReturnLong,
+  JvmtiFunctions::ForceEarlyReturnFloat,
+  JvmtiFunctions::ForceEarlyReturnDouble,
+  JvmtiFunctions::ForceEarlyReturnVoid,
+  JvmtiFunctions::RedefineClasses,
+  JvmtiFunctions::GetVersionNumber,
+  JvmtiFunctions::GetCapabilities,
+  JvmtiFunctions::GetSourceDebugExtension,  // 90
+  JvmtiFunctions::IsMethodObsolete,
+  JvmtiFunctions::SuspendThreadList,
+  JvmtiFunctions::ResumeThreadList,
+  nullptr,  // reserved94
+  nullptr,  // reserved95
+  nullptr,  // reserved96
+  nullptr,  // reserved97
+  nullptr,  // reserved98
+  nullptr,  // reserved99
+  JvmtiFunctions::GetAllStackTraces,  // 100
+  JvmtiFunctions::GetThreadListStackTraces,
+  JvmtiFunctions::GetThreadLocalStorage,
+  JvmtiFunctions::SetThreadLocalStorage,
+  JvmtiFunctions::GetStackTrace,
+  nullptr,  // reserved105
+  JvmtiFunctions::GetTag,
+  JvmtiFunctions::SetTag,
+  JvmtiFunctions::ForceGarbageCollection,
+  JvmtiFunctions::IterateOverObjectsReachableFromObject,
+  JvmtiFunctions::IterateOverReachableObjects,  // 110
+  JvmtiFunctions::IterateOverHeap,
+  JvmtiFunctions::IterateOverInstancesOfClass,
+  nullptr,  // reserved113
+  JvmtiFunctions::GetObjectsWithTags,
+  JvmtiFunctions::FollowReferences,
+  JvmtiFunctions::IterateThroughHeap,
+  nullptr,  // reserved117
+  nullptr,  // reserved118
+  nullptr,  // reserved119
+  JvmtiFunctions::SetJNIFunctionTable,  // 120
+  JvmtiFunctions::GetJNIFunctionTable,
+  JvmtiFunctions::SetEventCallbacks,
+  JvmtiFunctions::GenerateEvents,
+  JvmtiFunctions::GetExtensionFunctions,
+  JvmtiFunctions::GetExtensionEvents,
+  JvmtiFunctions::SetExtensionEventCallback,
+  JvmtiFunctions::DisposeEnvironment,
+  JvmtiFunctions::GetErrorName,
+  JvmtiFunctions::GetJLocationFormat,
+  JvmtiFunctions::GetSystemProperties,  // 130
+  JvmtiFunctions::GetSystemProperty,
+  JvmtiFunctions::SetSystemProperty,
+  JvmtiFunctions::GetPhase,
+  JvmtiFunctions::GetCurrentThreadCpuTimerInfo,
+  JvmtiFunctions::GetCurrentThreadCpuTime,
+  JvmtiFunctions::GetThreadCpuTimerInfo,
+  JvmtiFunctions::GetThreadCpuTime,
+  JvmtiFunctions::GetTimerInfo,
+  JvmtiFunctions::GetTime,
+  JvmtiFunctions::GetPotentialCapabilities,  // 140
+  nullptr,  // reserved141
+  JvmtiFunctions::AddCapabilities,
+  JvmtiFunctions::RelinquishCapabilities,
+  JvmtiFunctions::GetAvailableProcessors,
+  JvmtiFunctions::GetClassVersionNumbers,
+  JvmtiFunctions::GetConstantPool,
+  JvmtiFunctions::GetEnvironmentLocalStorage,
+  JvmtiFunctions::SetEnvironmentLocalStorage,
+  JvmtiFunctions::AddToBootstrapClassLoaderSearch,
+  JvmtiFunctions::SetVerboseFlag,  // 150
+  JvmtiFunctions::AddToSystemClassLoaderSearch,
+  JvmtiFunctions::RetransformClasses,
+  JvmtiFunctions::GetOwnedMonitorStackDepthInfo,
+  JvmtiFunctions::GetObjectSize,
+  JvmtiFunctions::GetLocalInstance,
+};
+
+};  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/README.md b/runtime/openjdkjvmti/README.md
new file mode 100644
index 0000000..b8bab57
--- /dev/null
+++ b/runtime/openjdkjvmti/README.md
@@ -0,0 +1,7 @@
+openjdkjvmti plugin
+====
+
+This is a partial implementation of the JVMTI v1.2 interface for the android
+runtime as a plugin. This allows the use of agents that can modify the running
+state of the program by modifying dex files in memory and performing other
+operations on the global runtime state.
diff --git a/runtime/openjdkjvmti/jvmti.h b/runtime/openjdkjvmti/jvmti.h
new file mode 100644
index 0000000..ee708cb
--- /dev/null
+++ b/runtime/openjdkjvmti/jvmti.h
@@ -0,0 +1,2534 @@
+/*
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+    /* AUTOMATICALLY GENERATED FILE - DO NOT EDIT */
+
+
+    /* Include file for the Java(tm) Virtual Machine Tool Interface */
+
+#ifndef _JAVA_JVMTI_H_
+#define _JAVA_JVMTI_H_
+
+#include "jni.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+    JVMTI_VERSION_1   = 0x30010000,
+    JVMTI_VERSION_1_0 = 0x30010000,
+    JVMTI_VERSION_1_1 = 0x30010100,
+    JVMTI_VERSION_1_2 = 0x30010200,
+
+    JVMTI_VERSION = 0x30000000 + (1 * 0x10000) + (2 * 0x100) + 1  /* version: 1.2.1 */
+};
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM *vm, char *options, void *reserved);
+
+JNIEXPORT jint JNICALL
+Agent_OnAttach(JavaVM* vm, char* options, void* reserved);
+
+JNIEXPORT void JNICALL
+Agent_OnUnload(JavaVM *vm);
+
+    /* Forward declaration of the environment */
+
+struct _jvmtiEnv;
+
+struct jvmtiInterface_1_;
+
+#ifdef __cplusplus
+typedef _jvmtiEnv jvmtiEnv;
+#else
+typedef const struct jvmtiInterface_1_ *jvmtiEnv;
+#endif /* __cplusplus */
+
+/* Derived Base Types */
+
+typedef jobject jthread;
+typedef jobject jthreadGroup;
+typedef jlong jlocation;
+struct _jrawMonitorID;
+typedef struct _jrawMonitorID *jrawMonitorID;
+typedef struct JNINativeInterface_ jniNativeInterface;
+
+    /* Constants */
+
+
+    /* Thread State Flags */
+
+enum {
+    JVMTI_THREAD_STATE_ALIVE = 0x0001,
+    JVMTI_THREAD_STATE_TERMINATED = 0x0002,
+    JVMTI_THREAD_STATE_RUNNABLE = 0x0004,
+    JVMTI_THREAD_STATE_BLOCKED_ON_MONITOR_ENTER = 0x0400,
+    JVMTI_THREAD_STATE_WAITING = 0x0080,
+    JVMTI_THREAD_STATE_WAITING_INDEFINITELY = 0x0010,
+    JVMTI_THREAD_STATE_WAITING_WITH_TIMEOUT = 0x0020,
+    JVMTI_THREAD_STATE_SLEEPING = 0x0040,
+    JVMTI_THREAD_STATE_IN_OBJECT_WAIT = 0x0100,
+    JVMTI_THREAD_STATE_PARKED = 0x0200,
+    JVMTI_THREAD_STATE_SUSPENDED = 0x100000,
+    JVMTI_THREAD_STATE_INTERRUPTED = 0x200000,
+    JVMTI_THREAD_STATE_IN_NATIVE = 0x400000,
+    JVMTI_THREAD_STATE_VENDOR_1 = 0x10000000,
+    JVMTI_THREAD_STATE_VENDOR_2 = 0x20000000,
+    JVMTI_THREAD_STATE_VENDOR_3 = 0x40000000
+};
+
+    /* java.lang.Thread.State Conversion Masks */
+
+enum {
+    JVMTI_JAVA_LANG_THREAD_STATE_MASK = JVMTI_THREAD_STATE_TERMINATED | JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_RUNNABLE | JVMTI_THREAD_STATE_BLOCKED_ON_MONITOR_ENTER | JVMTI_THREAD_STATE_WAITING | JVMTI_THREAD_STATE_WAITING_INDEFINITELY | JVMTI_THREAD_STATE_WAITING_WITH_TIMEOUT,
+    JVMTI_JAVA_LANG_THREAD_STATE_NEW = 0,
+    JVMTI_JAVA_LANG_THREAD_STATE_TERMINATED = JVMTI_THREAD_STATE_TERMINATED,
+    JVMTI_JAVA_LANG_THREAD_STATE_RUNNABLE = JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_RUNNABLE,
+    JVMTI_JAVA_LANG_THREAD_STATE_BLOCKED = JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_BLOCKED_ON_MONITOR_ENTER,
+    JVMTI_JAVA_LANG_THREAD_STATE_WAITING = JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_WAITING | JVMTI_THREAD_STATE_WAITING_INDEFINITELY,
+    JVMTI_JAVA_LANG_THREAD_STATE_TIMED_WAITING = JVMTI_THREAD_STATE_ALIVE | JVMTI_THREAD_STATE_WAITING | JVMTI_THREAD_STATE_WAITING_WITH_TIMEOUT
+};
+
+    /* Thread Priority Constants */
+
+enum {
+    JVMTI_THREAD_MIN_PRIORITY = 1,
+    JVMTI_THREAD_NORM_PRIORITY = 5,
+    JVMTI_THREAD_MAX_PRIORITY = 10
+};
+
+    /* Heap Filter Flags */
+
+enum {
+    JVMTI_HEAP_FILTER_TAGGED = 0x4,
+    JVMTI_HEAP_FILTER_UNTAGGED = 0x8,
+    JVMTI_HEAP_FILTER_CLASS_TAGGED = 0x10,
+    JVMTI_HEAP_FILTER_CLASS_UNTAGGED = 0x20
+};
+
+    /* Heap Visit Control Flags */
+
+enum {
+    JVMTI_VISIT_OBJECTS = 0x100,
+    JVMTI_VISIT_ABORT = 0x8000
+};
+
+    /* Heap Reference Enumeration */
+
+typedef enum {
+    JVMTI_HEAP_REFERENCE_CLASS = 1,
+    JVMTI_HEAP_REFERENCE_FIELD = 2,
+    JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT = 3,
+    JVMTI_HEAP_REFERENCE_CLASS_LOADER = 4,
+    JVMTI_HEAP_REFERENCE_SIGNERS = 5,
+    JVMTI_HEAP_REFERENCE_PROTECTION_DOMAIN = 6,
+    JVMTI_HEAP_REFERENCE_INTERFACE = 7,
+    JVMTI_HEAP_REFERENCE_STATIC_FIELD = 8,
+    JVMTI_HEAP_REFERENCE_CONSTANT_POOL = 9,
+    JVMTI_HEAP_REFERENCE_SUPERCLASS = 10,
+    JVMTI_HEAP_REFERENCE_JNI_GLOBAL = 21,
+    JVMTI_HEAP_REFERENCE_SYSTEM_CLASS = 22,
+    JVMTI_HEAP_REFERENCE_MONITOR = 23,
+    JVMTI_HEAP_REFERENCE_STACK_LOCAL = 24,
+    JVMTI_HEAP_REFERENCE_JNI_LOCAL = 25,
+    JVMTI_HEAP_REFERENCE_THREAD = 26,
+    JVMTI_HEAP_REFERENCE_OTHER = 27
+} jvmtiHeapReferenceKind;
+
+    /* Primitive Type Enumeration */
+
+typedef enum {
+    JVMTI_PRIMITIVE_TYPE_BOOLEAN = 90,
+    JVMTI_PRIMITIVE_TYPE_BYTE = 66,
+    JVMTI_PRIMITIVE_TYPE_CHAR = 67,
+    JVMTI_PRIMITIVE_TYPE_SHORT = 83,
+    JVMTI_PRIMITIVE_TYPE_INT = 73,
+    JVMTI_PRIMITIVE_TYPE_LONG = 74,
+    JVMTI_PRIMITIVE_TYPE_FLOAT = 70,
+    JVMTI_PRIMITIVE_TYPE_DOUBLE = 68
+} jvmtiPrimitiveType;
+
+    /* Heap Object Filter Enumeration */
+
+typedef enum {
+    JVMTI_HEAP_OBJECT_TAGGED = 1,
+    JVMTI_HEAP_OBJECT_UNTAGGED = 2,
+    JVMTI_HEAP_OBJECT_EITHER = 3
+} jvmtiHeapObjectFilter;
+
+    /* Heap Root Kind Enumeration */
+
+typedef enum {
+    JVMTI_HEAP_ROOT_JNI_GLOBAL = 1,
+    JVMTI_HEAP_ROOT_SYSTEM_CLASS = 2,
+    JVMTI_HEAP_ROOT_MONITOR = 3,
+    JVMTI_HEAP_ROOT_STACK_LOCAL = 4,
+    JVMTI_HEAP_ROOT_JNI_LOCAL = 5,
+    JVMTI_HEAP_ROOT_THREAD = 6,
+    JVMTI_HEAP_ROOT_OTHER = 7
+} jvmtiHeapRootKind;
+
+    /* Object Reference Enumeration */
+
+typedef enum {
+    JVMTI_REFERENCE_CLASS = 1,
+    JVMTI_REFERENCE_FIELD = 2,
+    JVMTI_REFERENCE_ARRAY_ELEMENT = 3,
+    JVMTI_REFERENCE_CLASS_LOADER = 4,
+    JVMTI_REFERENCE_SIGNERS = 5,
+    JVMTI_REFERENCE_PROTECTION_DOMAIN = 6,
+    JVMTI_REFERENCE_INTERFACE = 7,
+    JVMTI_REFERENCE_STATIC_FIELD = 8,
+    JVMTI_REFERENCE_CONSTANT_POOL = 9
+} jvmtiObjectReferenceKind;
+
+    /* Iteration Control Enumeration */
+
+typedef enum {
+    JVMTI_ITERATION_CONTINUE = 1,
+    JVMTI_ITERATION_IGNORE = 2,
+    JVMTI_ITERATION_ABORT = 0
+} jvmtiIterationControl;
+
+    /* Class Status Flags */
+
+enum {
+    JVMTI_CLASS_STATUS_VERIFIED = 1,
+    JVMTI_CLASS_STATUS_PREPARED = 2,
+    JVMTI_CLASS_STATUS_INITIALIZED = 4,
+    JVMTI_CLASS_STATUS_ERROR = 8,
+    JVMTI_CLASS_STATUS_ARRAY = 16,
+    JVMTI_CLASS_STATUS_PRIMITIVE = 32
+};
+
+    /* Event Enable/Disable */
+
+typedef enum {
+    JVMTI_ENABLE = 1,
+    JVMTI_DISABLE = 0
+} jvmtiEventMode;
+
+    /* Extension Function/Event Parameter Types */
+
+typedef enum {
+    JVMTI_TYPE_JBYTE = 101,
+    JVMTI_TYPE_JCHAR = 102,
+    JVMTI_TYPE_JSHORT = 103,
+    JVMTI_TYPE_JINT = 104,
+    JVMTI_TYPE_JLONG = 105,
+    JVMTI_TYPE_JFLOAT = 106,
+    JVMTI_TYPE_JDOUBLE = 107,
+    JVMTI_TYPE_JBOOLEAN = 108,
+    JVMTI_TYPE_JOBJECT = 109,
+    JVMTI_TYPE_JTHREAD = 110,
+    JVMTI_TYPE_JCLASS = 111,
+    JVMTI_TYPE_JVALUE = 112,
+    JVMTI_TYPE_JFIELDID = 113,
+    JVMTI_TYPE_JMETHODID = 114,
+    JVMTI_TYPE_CCHAR = 115,
+    JVMTI_TYPE_CVOID = 116,
+    JVMTI_TYPE_JNIENV = 117
+} jvmtiParamTypes;
+
+    /* Extension Function/Event Parameter Kinds */
+
+typedef enum {
+    JVMTI_KIND_IN = 91,
+    JVMTI_KIND_IN_PTR = 92,
+    JVMTI_KIND_IN_BUF = 93,
+    JVMTI_KIND_ALLOC_BUF = 94,
+    JVMTI_KIND_ALLOC_ALLOC_BUF = 95,
+    JVMTI_KIND_OUT = 96,
+    JVMTI_KIND_OUT_BUF = 97
+} jvmtiParamKind;
+
+    /* Timer Kinds */
+
+typedef enum {
+    JVMTI_TIMER_USER_CPU = 30,
+    JVMTI_TIMER_TOTAL_CPU = 31,
+    JVMTI_TIMER_ELAPSED = 32
+} jvmtiTimerKind;
+
+    /* Phases of execution */
+
+typedef enum {
+    JVMTI_PHASE_ONLOAD = 1,
+    JVMTI_PHASE_PRIMORDIAL = 2,
+    JVMTI_PHASE_START = 6,
+    JVMTI_PHASE_LIVE = 4,
+    JVMTI_PHASE_DEAD = 8
+} jvmtiPhase;
+
+    /* Version Interface Types */
+
+enum {
+    JVMTI_VERSION_INTERFACE_JNI = 0x00000000,
+    JVMTI_VERSION_INTERFACE_JVMTI = 0x30000000
+};
+
+    /* Version Masks */
+
+enum {
+    JVMTI_VERSION_MASK_INTERFACE_TYPE = 0x70000000,
+    JVMTI_VERSION_MASK_MAJOR = 0x0FFF0000,
+    JVMTI_VERSION_MASK_MINOR = 0x0000FF00,
+    JVMTI_VERSION_MASK_MICRO = 0x000000FF
+};
+
+    /* Version Shifts */
+
+enum {
+    JVMTI_VERSION_SHIFT_MAJOR = 16,
+    JVMTI_VERSION_SHIFT_MINOR = 8,
+    JVMTI_VERSION_SHIFT_MICRO = 0
+};
+
+    /* Verbose Flag Enumeration */
+
+typedef enum {
+    JVMTI_VERBOSE_OTHER = 0,
+    JVMTI_VERBOSE_GC = 1,
+    JVMTI_VERBOSE_CLASS = 2,
+    JVMTI_VERBOSE_JNI = 4
+} jvmtiVerboseFlag;
+
+    /* JLocation Format Enumeration */
+
+typedef enum {
+    JVMTI_JLOCATION_JVMBCI = 1,
+    JVMTI_JLOCATION_MACHINEPC = 2,
+    JVMTI_JLOCATION_OTHER = 0
+} jvmtiJlocationFormat;
+
+    /* Resource Exhaustion Flags */
+
+enum {
+    JVMTI_RESOURCE_EXHAUSTED_OOM_ERROR = 0x0001,
+    JVMTI_RESOURCE_EXHAUSTED_JAVA_HEAP = 0x0002,
+    JVMTI_RESOURCE_EXHAUSTED_THREADS = 0x0004
+};
+
+    /* Errors */
+
+typedef enum {
+    JVMTI_ERROR_NONE = 0,
+    JVMTI_ERROR_INVALID_THREAD = 10,
+    JVMTI_ERROR_INVALID_THREAD_GROUP = 11,
+    JVMTI_ERROR_INVALID_PRIORITY = 12,
+    JVMTI_ERROR_THREAD_NOT_SUSPENDED = 13,
+    JVMTI_ERROR_THREAD_SUSPENDED = 14,
+    JVMTI_ERROR_THREAD_NOT_ALIVE = 15,
+    JVMTI_ERROR_INVALID_OBJECT = 20,
+    JVMTI_ERROR_INVALID_CLASS = 21,
+    JVMTI_ERROR_CLASS_NOT_PREPARED = 22,
+    JVMTI_ERROR_INVALID_METHODID = 23,
+    JVMTI_ERROR_INVALID_LOCATION = 24,
+    JVMTI_ERROR_INVALID_FIELDID = 25,
+    JVMTI_ERROR_NO_MORE_FRAMES = 31,
+    JVMTI_ERROR_OPAQUE_FRAME = 32,
+    JVMTI_ERROR_TYPE_MISMATCH = 34,
+    JVMTI_ERROR_INVALID_SLOT = 35,
+    JVMTI_ERROR_DUPLICATE = 40,
+    JVMTI_ERROR_NOT_FOUND = 41,
+    JVMTI_ERROR_INVALID_MONITOR = 50,
+    JVMTI_ERROR_NOT_MONITOR_OWNER = 51,
+    JVMTI_ERROR_INTERRUPT = 52,
+    JVMTI_ERROR_INVALID_CLASS_FORMAT = 60,
+    JVMTI_ERROR_CIRCULAR_CLASS_DEFINITION = 61,
+    JVMTI_ERROR_FAILS_VERIFICATION = 62,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_ADDED = 63,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED = 64,
+    JVMTI_ERROR_INVALID_TYPESTATE = 65,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED = 66,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_DELETED = 67,
+    JVMTI_ERROR_UNSUPPORTED_VERSION = 68,
+    JVMTI_ERROR_NAMES_DONT_MATCH = 69,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED = 70,
+    JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED = 71,
+    JVMTI_ERROR_UNMODIFIABLE_CLASS = 79,
+    JVMTI_ERROR_NOT_AVAILABLE = 98,
+    JVMTI_ERROR_MUST_POSSESS_CAPABILITY = 99,
+    JVMTI_ERROR_NULL_POINTER = 100,
+    JVMTI_ERROR_ABSENT_INFORMATION = 101,
+    JVMTI_ERROR_INVALID_EVENT_TYPE = 102,
+    JVMTI_ERROR_ILLEGAL_ARGUMENT = 103,
+    JVMTI_ERROR_NATIVE_METHOD = 104,
+    JVMTI_ERROR_CLASS_LOADER_UNSUPPORTED = 106,
+    JVMTI_ERROR_OUT_OF_MEMORY = 110,
+    JVMTI_ERROR_ACCESS_DENIED = 111,
+    JVMTI_ERROR_WRONG_PHASE = 112,
+    JVMTI_ERROR_INTERNAL = 113,
+    JVMTI_ERROR_UNATTACHED_THREAD = 115,
+    JVMTI_ERROR_INVALID_ENVIRONMENT = 116,
+    JVMTI_ERROR_MAX = 116
+} jvmtiError;
+
+    /* Event IDs */
+
+typedef enum {
+    JVMTI_MIN_EVENT_TYPE_VAL = 50,
+    JVMTI_EVENT_VM_INIT = 50,
+    JVMTI_EVENT_VM_DEATH = 51,
+    JVMTI_EVENT_THREAD_START = 52,
+    JVMTI_EVENT_THREAD_END = 53,
+    JVMTI_EVENT_CLASS_FILE_LOAD_HOOK = 54,
+    JVMTI_EVENT_CLASS_LOAD = 55,
+    JVMTI_EVENT_CLASS_PREPARE = 56,
+    JVMTI_EVENT_VM_START = 57,
+    JVMTI_EVENT_EXCEPTION = 58,
+    JVMTI_EVENT_EXCEPTION_CATCH = 59,
+    JVMTI_EVENT_SINGLE_STEP = 60,
+    JVMTI_EVENT_FRAME_POP = 61,
+    JVMTI_EVENT_BREAKPOINT = 62,
+    JVMTI_EVENT_FIELD_ACCESS = 63,
+    JVMTI_EVENT_FIELD_MODIFICATION = 64,
+    JVMTI_EVENT_METHOD_ENTRY = 65,
+    JVMTI_EVENT_METHOD_EXIT = 66,
+    JVMTI_EVENT_NATIVE_METHOD_BIND = 67,
+    JVMTI_EVENT_COMPILED_METHOD_LOAD = 68,
+    JVMTI_EVENT_COMPILED_METHOD_UNLOAD = 69,
+    JVMTI_EVENT_DYNAMIC_CODE_GENERATED = 70,
+    JVMTI_EVENT_DATA_DUMP_REQUEST = 71,
+    JVMTI_EVENT_MONITOR_WAIT = 73,
+    JVMTI_EVENT_MONITOR_WAITED = 74,
+    JVMTI_EVENT_MONITOR_CONTENDED_ENTER = 75,
+    JVMTI_EVENT_MONITOR_CONTENDED_ENTERED = 76,
+    JVMTI_EVENT_RESOURCE_EXHAUSTED = 80,
+    JVMTI_EVENT_GARBAGE_COLLECTION_START = 81,
+    JVMTI_EVENT_GARBAGE_COLLECTION_FINISH = 82,
+    JVMTI_EVENT_OBJECT_FREE = 83,
+    JVMTI_EVENT_VM_OBJECT_ALLOC = 84,
+    JVMTI_MAX_EVENT_TYPE_VAL = 84
+} jvmtiEvent;
+
+
+    /* Pre-Declarations */
+struct _jvmtiThreadInfo;
+typedef struct _jvmtiThreadInfo jvmtiThreadInfo;
+struct _jvmtiMonitorStackDepthInfo;
+typedef struct _jvmtiMonitorStackDepthInfo jvmtiMonitorStackDepthInfo;
+struct _jvmtiThreadGroupInfo;
+typedef struct _jvmtiThreadGroupInfo jvmtiThreadGroupInfo;
+struct _jvmtiFrameInfo;
+typedef struct _jvmtiFrameInfo jvmtiFrameInfo;
+struct _jvmtiStackInfo;
+typedef struct _jvmtiStackInfo jvmtiStackInfo;
+struct _jvmtiHeapReferenceInfoField;
+typedef struct _jvmtiHeapReferenceInfoField jvmtiHeapReferenceInfoField;
+struct _jvmtiHeapReferenceInfoArray;
+typedef struct _jvmtiHeapReferenceInfoArray jvmtiHeapReferenceInfoArray;
+struct _jvmtiHeapReferenceInfoConstantPool;
+typedef struct _jvmtiHeapReferenceInfoConstantPool jvmtiHeapReferenceInfoConstantPool;
+struct _jvmtiHeapReferenceInfoStackLocal;
+typedef struct _jvmtiHeapReferenceInfoStackLocal jvmtiHeapReferenceInfoStackLocal;
+struct _jvmtiHeapReferenceInfoJniLocal;
+typedef struct _jvmtiHeapReferenceInfoJniLocal jvmtiHeapReferenceInfoJniLocal;
+struct _jvmtiHeapReferenceInfoReserved;
+typedef struct _jvmtiHeapReferenceInfoReserved jvmtiHeapReferenceInfoReserved;
+union _jvmtiHeapReferenceInfo;
+typedef union _jvmtiHeapReferenceInfo jvmtiHeapReferenceInfo;
+struct _jvmtiHeapCallbacks;
+typedef struct _jvmtiHeapCallbacks jvmtiHeapCallbacks;
+struct _jvmtiClassDefinition;
+typedef struct _jvmtiClassDefinition jvmtiClassDefinition;
+struct _jvmtiMonitorUsage;
+typedef struct _jvmtiMonitorUsage jvmtiMonitorUsage;
+struct _jvmtiLineNumberEntry;
+typedef struct _jvmtiLineNumberEntry jvmtiLineNumberEntry;
+struct _jvmtiLocalVariableEntry;
+typedef struct _jvmtiLocalVariableEntry jvmtiLocalVariableEntry;
+struct _jvmtiParamInfo;
+typedef struct _jvmtiParamInfo jvmtiParamInfo;
+struct _jvmtiExtensionFunctionInfo;
+typedef struct _jvmtiExtensionFunctionInfo jvmtiExtensionFunctionInfo;
+struct _jvmtiExtensionEventInfo;
+typedef struct _jvmtiExtensionEventInfo jvmtiExtensionEventInfo;
+struct _jvmtiTimerInfo;
+typedef struct _jvmtiTimerInfo jvmtiTimerInfo;
+struct _jvmtiAddrLocationMap;
+typedef struct _jvmtiAddrLocationMap jvmtiAddrLocationMap;
+
+    /* Function Types */
+
+typedef void (JNICALL *jvmtiStartFunction)
+    (jvmtiEnv* jvmti_env, JNIEnv* jni_env, void* arg);
+
+typedef jint (JNICALL *jvmtiHeapIterationCallback)
+    (jlong class_tag, jlong size, jlong* tag_ptr, jint length, void* user_data);
+
+typedef jint (JNICALL *jvmtiHeapReferenceCallback)
+    (jvmtiHeapReferenceKind reference_kind, const jvmtiHeapReferenceInfo* reference_info, jlong class_tag, jlong referrer_class_tag, jlong size, jlong* tag_ptr, jlong* referrer_tag_ptr, jint length, void* user_data);
+
+typedef jint (JNICALL *jvmtiPrimitiveFieldCallback)
+    (jvmtiHeapReferenceKind kind, const jvmtiHeapReferenceInfo* info, jlong object_class_tag, jlong* object_tag_ptr, jvalue value, jvmtiPrimitiveType value_type, void* user_data);
+
+typedef jint (JNICALL *jvmtiArrayPrimitiveValueCallback)
+    (jlong class_tag, jlong size, jlong* tag_ptr, jint element_count, jvmtiPrimitiveType element_type, const void* elements, void* user_data);
+
+typedef jint (JNICALL *jvmtiStringPrimitiveValueCallback)
+    (jlong class_tag, jlong size, jlong* tag_ptr, const jchar* value, jint value_length, void* user_data);
+
+typedef jint (JNICALL *jvmtiReservedCallback)
+    ();
+
+typedef jvmtiIterationControl (JNICALL *jvmtiHeapObjectCallback)
+    (jlong class_tag, jlong size, jlong* tag_ptr, void* user_data);
+
+typedef jvmtiIterationControl (JNICALL *jvmtiHeapRootCallback)
+    (jvmtiHeapRootKind root_kind, jlong class_tag, jlong size, jlong* tag_ptr, void* user_data);
+
+typedef jvmtiIterationControl (JNICALL *jvmtiStackReferenceCallback)
+    (jvmtiHeapRootKind root_kind, jlong class_tag, jlong size, jlong* tag_ptr, jlong thread_tag, jint depth, jmethodID method, jint slot, void* user_data);
+
+typedef jvmtiIterationControl (JNICALL *jvmtiObjectReferenceCallback)
+    (jvmtiObjectReferenceKind reference_kind, jlong class_tag, jlong size, jlong* tag_ptr, jlong referrer_tag, jint referrer_index, void* user_data);
+
+typedef jvmtiError (JNICALL *jvmtiExtensionFunction)
+    (jvmtiEnv* jvmti_env,  ...);
+
+typedef void (JNICALL *jvmtiExtensionEvent)
+    (jvmtiEnv* jvmti_env,  ...);
+
+
+    /* Structure Types */
+struct _jvmtiThreadInfo {
+    char* name;
+    jint priority;
+    jboolean is_daemon;
+    jthreadGroup thread_group;
+    jobject context_class_loader;
+};
+struct _jvmtiMonitorStackDepthInfo {
+    jobject monitor;
+    jint stack_depth;
+};
+struct _jvmtiThreadGroupInfo {
+    jthreadGroup parent;
+    char* name;
+    jint max_priority;
+    jboolean is_daemon;
+};
+struct _jvmtiFrameInfo {
+    jmethodID method;
+    jlocation location;
+};
+struct _jvmtiStackInfo {
+    jthread thread;
+    jint state;
+    jvmtiFrameInfo* frame_buffer;
+    jint frame_count;
+};
+struct _jvmtiHeapReferenceInfoField {
+    jint index;
+};
+struct _jvmtiHeapReferenceInfoArray {
+    jint index;
+};
+struct _jvmtiHeapReferenceInfoConstantPool {
+    jint index;
+};
+struct _jvmtiHeapReferenceInfoStackLocal {
+    jlong thread_tag;
+    jlong thread_id;
+    jint depth;
+    jmethodID method;
+    jlocation location;
+    jint slot;
+};
+struct _jvmtiHeapReferenceInfoJniLocal {
+    jlong thread_tag;
+    jlong thread_id;
+    jint depth;
+    jmethodID method;
+};
+struct _jvmtiHeapReferenceInfoReserved {
+    jlong reserved1;
+    jlong reserved2;
+    jlong reserved3;
+    jlong reserved4;
+    jlong reserved5;
+    jlong reserved6;
+    jlong reserved7;
+    jlong reserved8;
+};
+union _jvmtiHeapReferenceInfo {
+    jvmtiHeapReferenceInfoField field;
+    jvmtiHeapReferenceInfoArray array;
+    jvmtiHeapReferenceInfoConstantPool constant_pool;
+    jvmtiHeapReferenceInfoStackLocal stack_local;
+    jvmtiHeapReferenceInfoJniLocal jni_local;
+    jvmtiHeapReferenceInfoReserved other;
+};
+struct _jvmtiHeapCallbacks {
+    jvmtiHeapIterationCallback heap_iteration_callback;
+    jvmtiHeapReferenceCallback heap_reference_callback;
+    jvmtiPrimitiveFieldCallback primitive_field_callback;
+    jvmtiArrayPrimitiveValueCallback array_primitive_value_callback;
+    jvmtiStringPrimitiveValueCallback string_primitive_value_callback;
+    jvmtiReservedCallback reserved5;
+    jvmtiReservedCallback reserved6;
+    jvmtiReservedCallback reserved7;
+    jvmtiReservedCallback reserved8;
+    jvmtiReservedCallback reserved9;
+    jvmtiReservedCallback reserved10;
+    jvmtiReservedCallback reserved11;
+    jvmtiReservedCallback reserved12;
+    jvmtiReservedCallback reserved13;
+    jvmtiReservedCallback reserved14;
+    jvmtiReservedCallback reserved15;
+};
+struct _jvmtiClassDefinition {
+    jclass klass;
+    jint class_byte_count;
+    const unsigned char* class_bytes;
+};
+struct _jvmtiMonitorUsage {
+    jthread owner;
+    jint entry_count;
+    jint waiter_count;
+    jthread* waiters;
+    jint notify_waiter_count;
+    jthread* notify_waiters;
+};
+struct _jvmtiLineNumberEntry {
+    jlocation start_location;
+    jint line_number;
+};
+struct _jvmtiLocalVariableEntry {
+    jlocation start_location;
+    jint length;
+    char* name;
+    char* signature;
+    char* generic_signature;
+    jint slot;
+};
+struct _jvmtiParamInfo {
+    char* name;
+    jvmtiParamKind kind;
+    jvmtiParamTypes base_type;
+    jboolean null_ok;
+};
+struct _jvmtiExtensionFunctionInfo {
+    jvmtiExtensionFunction func;
+    char* id;
+    char* short_description;
+    jint param_count;
+    jvmtiParamInfo* params;
+    jint error_count;
+    jvmtiError* errors;
+};
+struct _jvmtiExtensionEventInfo {
+    jint extension_event_index;
+    char* id;
+    char* short_description;
+    jint param_count;
+    jvmtiParamInfo* params;
+};
+struct _jvmtiTimerInfo {
+    jlong max_value;
+    jboolean may_skip_forward;
+    jboolean may_skip_backward;
+    jvmtiTimerKind kind;
+    jlong reserved1;
+    jlong reserved2;
+};
+struct _jvmtiAddrLocationMap {
+    const void* start_address;
+    jlocation location;
+};
+
+typedef struct {
+    unsigned int can_tag_objects : 1;
+    unsigned int can_generate_field_modification_events : 1;
+    unsigned int can_generate_field_access_events : 1;
+    unsigned int can_get_bytecodes : 1;
+    unsigned int can_get_synthetic_attribute : 1;
+    unsigned int can_get_owned_monitor_info : 1;
+    unsigned int can_get_current_contended_monitor : 1;
+    unsigned int can_get_monitor_info : 1;
+    unsigned int can_pop_frame : 1;
+    unsigned int can_redefine_classes : 1;
+    unsigned int can_signal_thread : 1;
+    unsigned int can_get_source_file_name : 1;
+    unsigned int can_get_line_numbers : 1;
+    unsigned int can_get_source_debug_extension : 1;
+    unsigned int can_access_local_variables : 1;
+    unsigned int can_maintain_original_method_order : 1;
+    unsigned int can_generate_single_step_events : 1;
+    unsigned int can_generate_exception_events : 1;
+    unsigned int can_generate_frame_pop_events : 1;
+    unsigned int can_generate_breakpoint_events : 1;
+    unsigned int can_suspend : 1;
+    unsigned int can_redefine_any_class : 1;
+    unsigned int can_get_current_thread_cpu_time : 1;
+    unsigned int can_get_thread_cpu_time : 1;
+    unsigned int can_generate_method_entry_events : 1;
+    unsigned int can_generate_method_exit_events : 1;
+    unsigned int can_generate_all_class_hook_events : 1;
+    unsigned int can_generate_compiled_method_load_events : 1;
+    unsigned int can_generate_monitor_events : 1;
+    unsigned int can_generate_vm_object_alloc_events : 1;
+    unsigned int can_generate_native_method_bind_events : 1;
+    unsigned int can_generate_garbage_collection_events : 1;
+    unsigned int can_generate_object_free_events : 1;
+    unsigned int can_force_early_return : 1;
+    unsigned int can_get_owned_monitor_stack_depth_info : 1;
+    unsigned int can_get_constant_pool : 1;
+    unsigned int can_set_native_method_prefix : 1;
+    unsigned int can_retransform_classes : 1;
+    unsigned int can_retransform_any_class : 1;
+    unsigned int can_generate_resource_exhaustion_heap_events : 1;
+    unsigned int can_generate_resource_exhaustion_threads_events : 1;
+    unsigned int : 7;
+    unsigned int : 16;
+    unsigned int : 16;
+    unsigned int : 16;
+    unsigned int : 16;
+    unsigned int : 16;
+} jvmtiCapabilities;
+
+
+    /* Event Definitions */
+
+typedef void (JNICALL *jvmtiEventReserved)(void);
+
+
+typedef void (JNICALL *jvmtiEventBreakpoint)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location);
+
+typedef void (JNICALL *jvmtiEventClassFileLoadHook)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jclass class_being_redefined,
+     jobject loader,
+     const char* name,
+     jobject protection_domain,
+     jint class_data_len,
+     const unsigned char* class_data,
+     jint* new_class_data_len,
+     unsigned char** new_class_data);
+
+typedef void (JNICALL *jvmtiEventClassLoad)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jclass klass);
+
+typedef void (JNICALL *jvmtiEventClassPrepare)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jclass klass);
+
+typedef void (JNICALL *jvmtiEventCompiledMethodLoad)
+    (jvmtiEnv *jvmti_env,
+     jmethodID method,
+     jint code_size,
+     const void* code_addr,
+     jint map_length,
+     const jvmtiAddrLocationMap* map,
+     const void* compile_info);
+
+typedef void (JNICALL *jvmtiEventCompiledMethodUnload)
+    (jvmtiEnv *jvmti_env,
+     jmethodID method,
+     const void* code_addr);
+
+typedef void (JNICALL *jvmtiEventDataDumpRequest)
+    (jvmtiEnv *jvmti_env);
+
+typedef void (JNICALL *jvmtiEventDynamicCodeGenerated)
+    (jvmtiEnv *jvmti_env,
+     const char* name,
+     const void* address,
+     jint length);
+
+typedef void (JNICALL *jvmtiEventException)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location,
+     jobject exception,
+     jmethodID catch_method,
+     jlocation catch_location);
+
+typedef void (JNICALL *jvmtiEventExceptionCatch)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location,
+     jobject exception);
+
+typedef void (JNICALL *jvmtiEventFieldAccess)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location,
+     jclass field_klass,
+     jobject object,
+     jfieldID field);
+
+typedef void (JNICALL *jvmtiEventFieldModification)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location,
+     jclass field_klass,
+     jobject object,
+     jfieldID field,
+     char signature_type,
+     jvalue new_value);
+
+typedef void (JNICALL *jvmtiEventFramePop)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jboolean was_popped_by_exception);
+
+typedef void (JNICALL *jvmtiEventGarbageCollectionFinish)
+    (jvmtiEnv *jvmti_env);
+
+typedef void (JNICALL *jvmtiEventGarbageCollectionStart)
+    (jvmtiEnv *jvmti_env);
+
+typedef void (JNICALL *jvmtiEventMethodEntry)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method);
+
+typedef void (JNICALL *jvmtiEventMethodExit)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jboolean was_popped_by_exception,
+     jvalue return_value);
+
+typedef void (JNICALL *jvmtiEventMonitorContendedEnter)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object);
+
+typedef void (JNICALL *jvmtiEventMonitorContendedEntered)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object);
+
+typedef void (JNICALL *jvmtiEventMonitorWait)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object,
+     jlong timeout);
+
+typedef void (JNICALL *jvmtiEventMonitorWaited)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object,
+     jboolean timed_out);
+
+typedef void (JNICALL *jvmtiEventNativeMethodBind)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     void* address,
+     void** new_address_ptr);
+
+typedef void (JNICALL *jvmtiEventObjectFree)
+    (jvmtiEnv *jvmti_env,
+     jlong tag);
+
+typedef void (JNICALL *jvmtiEventResourceExhausted)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jint flags,
+     const void* reserved,
+     const char* description);
+
+typedef void (JNICALL *jvmtiEventSingleStep)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jmethodID method,
+     jlocation location);
+
+typedef void (JNICALL *jvmtiEventThreadEnd)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread);
+
+typedef void (JNICALL *jvmtiEventThreadStart)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread);
+
+typedef void (JNICALL *jvmtiEventVMDeath)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env);
+
+typedef void (JNICALL *jvmtiEventVMInit)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread);
+
+typedef void (JNICALL *jvmtiEventVMObjectAlloc)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env,
+     jthread thread,
+     jobject object,
+     jclass object_klass,
+     jlong size);
+
+typedef void (JNICALL *jvmtiEventVMStart)
+    (jvmtiEnv *jvmti_env,
+     JNIEnv* jni_env);
+
+    /* Event Callback Structure */
+
+typedef struct {
+                              /*   50 : VM Initialization Event */
+    jvmtiEventVMInit VMInit;
+                              /*   51 : VM Death Event */
+    jvmtiEventVMDeath VMDeath;
+                              /*   52 : Thread Start */
+    jvmtiEventThreadStart ThreadStart;
+                              /*   53 : Thread End */
+    jvmtiEventThreadEnd ThreadEnd;
+                              /*   54 : Class File Load Hook */
+    jvmtiEventClassFileLoadHook ClassFileLoadHook;
+                              /*   55 : Class Load */
+    jvmtiEventClassLoad ClassLoad;
+                              /*   56 : Class Prepare */
+    jvmtiEventClassPrepare ClassPrepare;
+                              /*   57 : VM Start Event */
+    jvmtiEventVMStart VMStart;
+                              /*   58 : Exception */
+    jvmtiEventException Exception;
+                              /*   59 : Exception Catch */
+    jvmtiEventExceptionCatch ExceptionCatch;
+                              /*   60 : Single Step */
+    jvmtiEventSingleStep SingleStep;
+                              /*   61 : Frame Pop */
+    jvmtiEventFramePop FramePop;
+                              /*   62 : Breakpoint */
+    jvmtiEventBreakpoint Breakpoint;
+                              /*   63 : Field Access */
+    jvmtiEventFieldAccess FieldAccess;
+                              /*   64 : Field Modification */
+    jvmtiEventFieldModification FieldModification;
+                              /*   65 : Method Entry */
+    jvmtiEventMethodEntry MethodEntry;
+                              /*   66 : Method Exit */
+    jvmtiEventMethodExit MethodExit;
+                              /*   67 : Native Method Bind */
+    jvmtiEventNativeMethodBind NativeMethodBind;
+                              /*   68 : Compiled Method Load */
+    jvmtiEventCompiledMethodLoad CompiledMethodLoad;
+                              /*   69 : Compiled Method Unload */
+    jvmtiEventCompiledMethodUnload CompiledMethodUnload;
+                              /*   70 : Dynamic Code Generated */
+    jvmtiEventDynamicCodeGenerated DynamicCodeGenerated;
+                              /*   71 : Data Dump Request */
+    jvmtiEventDataDumpRequest DataDumpRequest;
+                              /*   72 */
+    jvmtiEventReserved reserved72;
+                              /*   73 : Monitor Wait */
+    jvmtiEventMonitorWait MonitorWait;
+                              /*   74 : Monitor Waited */
+    jvmtiEventMonitorWaited MonitorWaited;
+                              /*   75 : Monitor Contended Enter */
+    jvmtiEventMonitorContendedEnter MonitorContendedEnter;
+                              /*   76 : Monitor Contended Entered */
+    jvmtiEventMonitorContendedEntered MonitorContendedEntered;
+                              /*   77 */
+    jvmtiEventReserved reserved77;
+                              /*   78 */
+    jvmtiEventReserved reserved78;
+                              /*   79 */
+    jvmtiEventReserved reserved79;
+                              /*   80 : Resource Exhausted */
+    jvmtiEventResourceExhausted ResourceExhausted;
+                              /*   81 : Garbage Collection Start */
+    jvmtiEventGarbageCollectionStart GarbageCollectionStart;
+                              /*   82 : Garbage Collection Finish */
+    jvmtiEventGarbageCollectionFinish GarbageCollectionFinish;
+                              /*   83 : Object Free */
+    jvmtiEventObjectFree ObjectFree;
+                              /*   84 : VM Object Allocation */
+    jvmtiEventVMObjectAlloc VMObjectAlloc;
+} jvmtiEventCallbacks;
+
+
+    /* Function Interface */
+
+typedef struct jvmtiInterface_1_ {
+
+  /*   1 :  RESERVED */
+  void *reserved1;
+
+  /*   2 : Set Event Notification Mode */
+  jvmtiError (JNICALL *SetEventNotificationMode) (jvmtiEnv* env,
+    jvmtiEventMode mode,
+    jvmtiEvent event_type,
+    jthread event_thread,
+     ...);
+
+  /*   3 :  RESERVED */
+  void *reserved3;
+
+  /*   4 : Get All Threads */
+  jvmtiError (JNICALL *GetAllThreads) (jvmtiEnv* env,
+    jint* threads_count_ptr,
+    jthread** threads_ptr);
+
+  /*   5 : Suspend Thread */
+  jvmtiError (JNICALL *SuspendThread) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   6 : Resume Thread */
+  jvmtiError (JNICALL *ResumeThread) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   7 : Stop Thread */
+  jvmtiError (JNICALL *StopThread) (jvmtiEnv* env,
+    jthread thread,
+    jobject exception);
+
+  /*   8 : Interrupt Thread */
+  jvmtiError (JNICALL *InterruptThread) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   9 : Get Thread Info */
+  jvmtiError (JNICALL *GetThreadInfo) (jvmtiEnv* env,
+    jthread thread,
+    jvmtiThreadInfo* info_ptr);
+
+  /*   10 : Get Owned Monitor Info */
+  jvmtiError (JNICALL *GetOwnedMonitorInfo) (jvmtiEnv* env,
+    jthread thread,
+    jint* owned_monitor_count_ptr,
+    jobject** owned_monitors_ptr);
+
+  /*   11 : Get Current Contended Monitor */
+  jvmtiError (JNICALL *GetCurrentContendedMonitor) (jvmtiEnv* env,
+    jthread thread,
+    jobject* monitor_ptr);
+
+  /*   12 : Run Agent Thread */
+  jvmtiError (JNICALL *RunAgentThread) (jvmtiEnv* env,
+    jthread thread,
+    jvmtiStartFunction proc,
+    const void* arg,
+    jint priority);
+
+  /*   13 : Get Top Thread Groups */
+  jvmtiError (JNICALL *GetTopThreadGroups) (jvmtiEnv* env,
+    jint* group_count_ptr,
+    jthreadGroup** groups_ptr);
+
+  /*   14 : Get Thread Group Info */
+  jvmtiError (JNICALL *GetThreadGroupInfo) (jvmtiEnv* env,
+    jthreadGroup group,
+    jvmtiThreadGroupInfo* info_ptr);
+
+  /*   15 : Get Thread Group Children */
+  jvmtiError (JNICALL *GetThreadGroupChildren) (jvmtiEnv* env,
+    jthreadGroup group,
+    jint* thread_count_ptr,
+    jthread** threads_ptr,
+    jint* group_count_ptr,
+    jthreadGroup** groups_ptr);
+
+  /*   16 : Get Frame Count */
+  jvmtiError (JNICALL *GetFrameCount) (jvmtiEnv* env,
+    jthread thread,
+    jint* count_ptr);
+
+  /*   17 : Get Thread State */
+  jvmtiError (JNICALL *GetThreadState) (jvmtiEnv* env,
+    jthread thread,
+    jint* thread_state_ptr);
+
+  /*   18 : Get Current Thread */
+  jvmtiError (JNICALL *GetCurrentThread) (jvmtiEnv* env,
+    jthread* thread_ptr);
+
+  /*   19 : Get Frame Location */
+  jvmtiError (JNICALL *GetFrameLocation) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jmethodID* method_ptr,
+    jlocation* location_ptr);
+
+  /*   20 : Notify Frame Pop */
+  jvmtiError (JNICALL *NotifyFramePop) (jvmtiEnv* env,
+    jthread thread,
+    jint depth);
+
+  /*   21 : Get Local Variable - Object */
+  jvmtiError (JNICALL *GetLocalObject) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jobject* value_ptr);
+
+  /*   22 : Get Local Variable - Int */
+  jvmtiError (JNICALL *GetLocalInt) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jint* value_ptr);
+
+  /*   23 : Get Local Variable - Long */
+  jvmtiError (JNICALL *GetLocalLong) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jlong* value_ptr);
+
+  /*   24 : Get Local Variable - Float */
+  jvmtiError (JNICALL *GetLocalFloat) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jfloat* value_ptr);
+
+  /*   25 : Get Local Variable - Double */
+  jvmtiError (JNICALL *GetLocalDouble) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jdouble* value_ptr);
+
+  /*   26 : Set Local Variable - Object */
+  jvmtiError (JNICALL *SetLocalObject) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jobject value);
+
+  /*   27 : Set Local Variable - Int */
+  jvmtiError (JNICALL *SetLocalInt) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jint value);
+
+  /*   28 : Set Local Variable - Long */
+  jvmtiError (JNICALL *SetLocalLong) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jlong value);
+
+  /*   29 : Set Local Variable - Float */
+  jvmtiError (JNICALL *SetLocalFloat) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jfloat value);
+
+  /*   30 : Set Local Variable - Double */
+  jvmtiError (JNICALL *SetLocalDouble) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jint slot,
+    jdouble value);
+
+  /*   31 : Create Raw Monitor */
+  jvmtiError (JNICALL *CreateRawMonitor) (jvmtiEnv* env,
+    const char* name,
+    jrawMonitorID* monitor_ptr);
+
+  /*   32 : Destroy Raw Monitor */
+  jvmtiError (JNICALL *DestroyRawMonitor) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   33 : Raw Monitor Enter */
+  jvmtiError (JNICALL *RawMonitorEnter) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   34 : Raw Monitor Exit */
+  jvmtiError (JNICALL *RawMonitorExit) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   35 : Raw Monitor Wait */
+  jvmtiError (JNICALL *RawMonitorWait) (jvmtiEnv* env,
+    jrawMonitorID monitor,
+    jlong millis);
+
+  /*   36 : Raw Monitor Notify */
+  jvmtiError (JNICALL *RawMonitorNotify) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   37 : Raw Monitor Notify All */
+  jvmtiError (JNICALL *RawMonitorNotifyAll) (jvmtiEnv* env,
+    jrawMonitorID monitor);
+
+  /*   38 : Set Breakpoint */
+  jvmtiError (JNICALL *SetBreakpoint) (jvmtiEnv* env,
+    jmethodID method,
+    jlocation location);
+
+  /*   39 : Clear Breakpoint */
+  jvmtiError (JNICALL *ClearBreakpoint) (jvmtiEnv* env,
+    jmethodID method,
+    jlocation location);
+
+  /*   40 :  RESERVED */
+  void *reserved40;
+
+  /*   41 : Set Field Access Watch */
+  jvmtiError (JNICALL *SetFieldAccessWatch) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field);
+
+  /*   42 : Clear Field Access Watch */
+  jvmtiError (JNICALL *ClearFieldAccessWatch) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field);
+
+  /*   43 : Set Field Modification Watch */
+  jvmtiError (JNICALL *SetFieldModificationWatch) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field);
+
+  /*   44 : Clear Field Modification Watch */
+  jvmtiError (JNICALL *ClearFieldModificationWatch) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field);
+
+  /*   45 : Is Modifiable Class */
+  jvmtiError (JNICALL *IsModifiableClass) (jvmtiEnv* env,
+    jclass klass,
+    jboolean* is_modifiable_class_ptr);
+
+  /*   46 : Allocate */
+  jvmtiError (JNICALL *Allocate) (jvmtiEnv* env,
+    jlong size,
+    unsigned char** mem_ptr);
+
+  /*   47 : Deallocate */
+  jvmtiError (JNICALL *Deallocate) (jvmtiEnv* env,
+    unsigned char* mem);
+
+  /*   48 : Get Class Signature */
+  jvmtiError (JNICALL *GetClassSignature) (jvmtiEnv* env,
+    jclass klass,
+    char** signature_ptr,
+    char** generic_ptr);
+
+  /*   49 : Get Class Status */
+  jvmtiError (JNICALL *GetClassStatus) (jvmtiEnv* env,
+    jclass klass,
+    jint* status_ptr);
+
+  /*   50 : Get Source File Name */
+  jvmtiError (JNICALL *GetSourceFileName) (jvmtiEnv* env,
+    jclass klass,
+    char** source_name_ptr);
+
+  /*   51 : Get Class Modifiers */
+  jvmtiError (JNICALL *GetClassModifiers) (jvmtiEnv* env,
+    jclass klass,
+    jint* modifiers_ptr);
+
+  /*   52 : Get Class Methods */
+  jvmtiError (JNICALL *GetClassMethods) (jvmtiEnv* env,
+    jclass klass,
+    jint* method_count_ptr,
+    jmethodID** methods_ptr);
+
+  /*   53 : Get Class Fields */
+  jvmtiError (JNICALL *GetClassFields) (jvmtiEnv* env,
+    jclass klass,
+    jint* field_count_ptr,
+    jfieldID** fields_ptr);
+
+  /*   54 : Get Implemented Interfaces */
+  jvmtiError (JNICALL *GetImplementedInterfaces) (jvmtiEnv* env,
+    jclass klass,
+    jint* interface_count_ptr,
+    jclass** interfaces_ptr);
+
+  /*   55 : Is Interface */
+  jvmtiError (JNICALL *IsInterface) (jvmtiEnv* env,
+    jclass klass,
+    jboolean* is_interface_ptr);
+
+  /*   56 : Is Array Class */
+  jvmtiError (JNICALL *IsArrayClass) (jvmtiEnv* env,
+    jclass klass,
+    jboolean* is_array_class_ptr);
+
+  /*   57 : Get Class Loader */
+  jvmtiError (JNICALL *GetClassLoader) (jvmtiEnv* env,
+    jclass klass,
+    jobject* classloader_ptr);
+
+  /*   58 : Get Object Hash Code */
+  jvmtiError (JNICALL *GetObjectHashCode) (jvmtiEnv* env,
+    jobject object,
+    jint* hash_code_ptr);
+
+  /*   59 : Get Object Monitor Usage */
+  jvmtiError (JNICALL *GetObjectMonitorUsage) (jvmtiEnv* env,
+    jobject object,
+    jvmtiMonitorUsage* info_ptr);
+
+  /*   60 : Get Field Name (and Signature) */
+  jvmtiError (JNICALL *GetFieldName) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field,
+    char** name_ptr,
+    char** signature_ptr,
+    char** generic_ptr);
+
+  /*   61 : Get Field Declaring Class */
+  jvmtiError (JNICALL *GetFieldDeclaringClass) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field,
+    jclass* declaring_class_ptr);
+
+  /*   62 : Get Field Modifiers */
+  jvmtiError (JNICALL *GetFieldModifiers) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field,
+    jint* modifiers_ptr);
+
+  /*   63 : Is Field Synthetic */
+  jvmtiError (JNICALL *IsFieldSynthetic) (jvmtiEnv* env,
+    jclass klass,
+    jfieldID field,
+    jboolean* is_synthetic_ptr);
+
+  /*   64 : Get Method Name (and Signature) */
+  jvmtiError (JNICALL *GetMethodName) (jvmtiEnv* env,
+    jmethodID method,
+    char** name_ptr,
+    char** signature_ptr,
+    char** generic_ptr);
+
+  /*   65 : Get Method Declaring Class */
+  jvmtiError (JNICALL *GetMethodDeclaringClass) (jvmtiEnv* env,
+    jmethodID method,
+    jclass* declaring_class_ptr);
+
+  /*   66 : Get Method Modifiers */
+  jvmtiError (JNICALL *GetMethodModifiers) (jvmtiEnv* env,
+    jmethodID method,
+    jint* modifiers_ptr);
+
+  /*   67 :  RESERVED */
+  void *reserved67;
+
+  /*   68 : Get Max Locals */
+  jvmtiError (JNICALL *GetMaxLocals) (jvmtiEnv* env,
+    jmethodID method,
+    jint* max_ptr);
+
+  /*   69 : Get Arguments Size */
+  jvmtiError (JNICALL *GetArgumentsSize) (jvmtiEnv* env,
+    jmethodID method,
+    jint* size_ptr);
+
+  /*   70 : Get Line Number Table */
+  jvmtiError (JNICALL *GetLineNumberTable) (jvmtiEnv* env,
+    jmethodID method,
+    jint* entry_count_ptr,
+    jvmtiLineNumberEntry** table_ptr);
+
+  /*   71 : Get Method Location */
+  jvmtiError (JNICALL *GetMethodLocation) (jvmtiEnv* env,
+    jmethodID method,
+    jlocation* start_location_ptr,
+    jlocation* end_location_ptr);
+
+  /*   72 : Get Local Variable Table */
+  jvmtiError (JNICALL *GetLocalVariableTable) (jvmtiEnv* env,
+    jmethodID method,
+    jint* entry_count_ptr,
+    jvmtiLocalVariableEntry** table_ptr);
+
+  /*   73 : Set Native Method Prefix */
+  jvmtiError (JNICALL *SetNativeMethodPrefix) (jvmtiEnv* env,
+    const char* prefix);
+
+  /*   74 : Set Native Method Prefixes */
+  jvmtiError (JNICALL *SetNativeMethodPrefixes) (jvmtiEnv* env,
+    jint prefix_count,
+    char** prefixes);
+
+  /*   75 : Get Bytecodes */
+  jvmtiError (JNICALL *GetBytecodes) (jvmtiEnv* env,
+    jmethodID method,
+    jint* bytecode_count_ptr,
+    unsigned char** bytecodes_ptr);
+
+  /*   76 : Is Method Native */
+  jvmtiError (JNICALL *IsMethodNative) (jvmtiEnv* env,
+    jmethodID method,
+    jboolean* is_native_ptr);
+
+  /*   77 : Is Method Synthetic */
+  jvmtiError (JNICALL *IsMethodSynthetic) (jvmtiEnv* env,
+    jmethodID method,
+    jboolean* is_synthetic_ptr);
+
+  /*   78 : Get Loaded Classes */
+  jvmtiError (JNICALL *GetLoadedClasses) (jvmtiEnv* env,
+    jint* class_count_ptr,
+    jclass** classes_ptr);
+
+  /*   79 : Get Classloader Classes */
+  jvmtiError (JNICALL *GetClassLoaderClasses) (jvmtiEnv* env,
+    jobject initiating_loader,
+    jint* class_count_ptr,
+    jclass** classes_ptr);
+
+  /*   80 : Pop Frame */
+  jvmtiError (JNICALL *PopFrame) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   81 : Force Early Return - Object */
+  jvmtiError (JNICALL *ForceEarlyReturnObject) (jvmtiEnv* env,
+    jthread thread,
+    jobject value);
+
+  /*   82 : Force Early Return - Int */
+  jvmtiError (JNICALL *ForceEarlyReturnInt) (jvmtiEnv* env,
+    jthread thread,
+    jint value);
+
+  /*   83 : Force Early Return - Long */
+  jvmtiError (JNICALL *ForceEarlyReturnLong) (jvmtiEnv* env,
+    jthread thread,
+    jlong value);
+
+  /*   84 : Force Early Return - Float */
+  jvmtiError (JNICALL *ForceEarlyReturnFloat) (jvmtiEnv* env,
+    jthread thread,
+    jfloat value);
+
+  /*   85 : Force Early Return - Double */
+  jvmtiError (JNICALL *ForceEarlyReturnDouble) (jvmtiEnv* env,
+    jthread thread,
+    jdouble value);
+
+  /*   86 : Force Early Return - Void */
+  jvmtiError (JNICALL *ForceEarlyReturnVoid) (jvmtiEnv* env,
+    jthread thread);
+
+  /*   87 : Redefine Classes */
+  jvmtiError (JNICALL *RedefineClasses) (jvmtiEnv* env,
+    jint class_count,
+    const jvmtiClassDefinition* class_definitions);
+
+  /*   88 : Get Version Number */
+  jvmtiError (JNICALL *GetVersionNumber) (jvmtiEnv* env,
+    jint* version_ptr);
+
+  /*   89 : Get Capabilities */
+  jvmtiError (JNICALL *GetCapabilities) (jvmtiEnv* env,
+    jvmtiCapabilities* capabilities_ptr);
+
+  /*   90 : Get Source Debug Extension */
+  jvmtiError (JNICALL *GetSourceDebugExtension) (jvmtiEnv* env,
+    jclass klass,
+    char** source_debug_extension_ptr);
+
+  /*   91 : Is Method Obsolete */
+  jvmtiError (JNICALL *IsMethodObsolete) (jvmtiEnv* env,
+    jmethodID method,
+    jboolean* is_obsolete_ptr);
+
+  /*   92 : Suspend Thread List */
+  jvmtiError (JNICALL *SuspendThreadList) (jvmtiEnv* env,
+    jint request_count,
+    const jthread* request_list,
+    jvmtiError* results);
+
+  /*   93 : Resume Thread List */
+  jvmtiError (JNICALL *ResumeThreadList) (jvmtiEnv* env,
+    jint request_count,
+    const jthread* request_list,
+    jvmtiError* results);
+
+  /*   94 :  RESERVED */
+  void *reserved94;
+
+  /*   95 :  RESERVED */
+  void *reserved95;
+
+  /*   96 :  RESERVED */
+  void *reserved96;
+
+  /*   97 :  RESERVED */
+  void *reserved97;
+
+  /*   98 :  RESERVED */
+  void *reserved98;
+
+  /*   99 :  RESERVED */
+  void *reserved99;
+
+  /*   100 : Get All Stack Traces */
+  jvmtiError (JNICALL *GetAllStackTraces) (jvmtiEnv* env,
+    jint max_frame_count,
+    jvmtiStackInfo** stack_info_ptr,
+    jint* thread_count_ptr);
+
+  /*   101 : Get Thread List Stack Traces */
+  jvmtiError (JNICALL *GetThreadListStackTraces) (jvmtiEnv* env,
+    jint thread_count,
+    const jthread* thread_list,
+    jint max_frame_count,
+    jvmtiStackInfo** stack_info_ptr);
+
+  /*   102 : Get Thread Local Storage */
+  jvmtiError (JNICALL *GetThreadLocalStorage) (jvmtiEnv* env,
+    jthread thread,
+    void** data_ptr);
+
+  /*   103 : Set Thread Local Storage */
+  jvmtiError (JNICALL *SetThreadLocalStorage) (jvmtiEnv* env,
+    jthread thread,
+    const void* data);
+
+  /*   104 : Get Stack Trace */
+  jvmtiError (JNICALL *GetStackTrace) (jvmtiEnv* env,
+    jthread thread,
+    jint start_depth,
+    jint max_frame_count,
+    jvmtiFrameInfo* frame_buffer,
+    jint* count_ptr);
+
+  /*   105 :  RESERVED */
+  void *reserved105;
+
+  /*   106 : Get Tag */
+  jvmtiError (JNICALL *GetTag) (jvmtiEnv* env,
+    jobject object,
+    jlong* tag_ptr);
+
+  /*   107 : Set Tag */
+  jvmtiError (JNICALL *SetTag) (jvmtiEnv* env,
+    jobject object,
+    jlong tag);
+
+  /*   108 : Force Garbage Collection */
+  jvmtiError (JNICALL *ForceGarbageCollection) (jvmtiEnv* env);
+
+  /*   109 : Iterate Over Objects Reachable From Object */
+  jvmtiError (JNICALL *IterateOverObjectsReachableFromObject) (jvmtiEnv* env,
+    jobject object,
+    jvmtiObjectReferenceCallback object_reference_callback,
+    const void* user_data);
+
+  /*   110 : Iterate Over Reachable Objects */
+  jvmtiError (JNICALL *IterateOverReachableObjects) (jvmtiEnv* env,
+    jvmtiHeapRootCallback heap_root_callback,
+    jvmtiStackReferenceCallback stack_ref_callback,
+    jvmtiObjectReferenceCallback object_ref_callback,
+    const void* user_data);
+
+  /*   111 : Iterate Over Heap */
+  jvmtiError (JNICALL *IterateOverHeap) (jvmtiEnv* env,
+    jvmtiHeapObjectFilter object_filter,
+    jvmtiHeapObjectCallback heap_object_callback,
+    const void* user_data);
+
+  /*   112 : Iterate Over Instances Of Class */
+  jvmtiError (JNICALL *IterateOverInstancesOfClass) (jvmtiEnv* env,
+    jclass klass,
+    jvmtiHeapObjectFilter object_filter,
+    jvmtiHeapObjectCallback heap_object_callback,
+    const void* user_data);
+
+  /*   113 :  RESERVED */
+  void *reserved113;
+
+  /*   114 : Get Objects With Tags */
+  jvmtiError (JNICALL *GetObjectsWithTags) (jvmtiEnv* env,
+    jint tag_count,
+    const jlong* tags,
+    jint* count_ptr,
+    jobject** object_result_ptr,
+    jlong** tag_result_ptr);
+
+  /*   115 : Follow References */
+  jvmtiError (JNICALL *FollowReferences) (jvmtiEnv* env,
+    jint heap_filter,
+    jclass klass,
+    jobject initial_object,
+    const jvmtiHeapCallbacks* callbacks,
+    const void* user_data);
+
+  /*   116 : Iterate Through Heap */
+  jvmtiError (JNICALL *IterateThroughHeap) (jvmtiEnv* env,
+    jint heap_filter,
+    jclass klass,
+    const jvmtiHeapCallbacks* callbacks,
+    const void* user_data);
+
+  /*   117 :  RESERVED */
+  void *reserved117;
+
+  /*   118 :  RESERVED */
+  void *reserved118;
+
+  /*   119 :  RESERVED */
+  void *reserved119;
+
+  /*   120 : Set JNI Function Table */
+  jvmtiError (JNICALL *SetJNIFunctionTable) (jvmtiEnv* env,
+    const jniNativeInterface* function_table);
+
+  /*   121 : Get JNI Function Table */
+  jvmtiError (JNICALL *GetJNIFunctionTable) (jvmtiEnv* env,
+    jniNativeInterface** function_table);
+
+  /*   122 : Set Event Callbacks */
+  jvmtiError (JNICALL *SetEventCallbacks) (jvmtiEnv* env,
+    const jvmtiEventCallbacks* callbacks,
+    jint size_of_callbacks);
+
+  /*   123 : Generate Events */
+  jvmtiError (JNICALL *GenerateEvents) (jvmtiEnv* env,
+    jvmtiEvent event_type);
+
+  /*   124 : Get Extension Functions */
+  jvmtiError (JNICALL *GetExtensionFunctions) (jvmtiEnv* env,
+    jint* extension_count_ptr,
+    jvmtiExtensionFunctionInfo** extensions);
+
+  /*   125 : Get Extension Events */
+  jvmtiError (JNICALL *GetExtensionEvents) (jvmtiEnv* env,
+    jint* extension_count_ptr,
+    jvmtiExtensionEventInfo** extensions);
+
+  /*   126 : Set Extension Event Callback */
+  jvmtiError (JNICALL *SetExtensionEventCallback) (jvmtiEnv* env,
+    jint extension_event_index,
+    jvmtiExtensionEvent callback);
+
+  /*   127 : Dispose Environment */
+  jvmtiError (JNICALL *DisposeEnvironment) (jvmtiEnv* env);
+
+  /*   128 : Get Error Name */
+  jvmtiError (JNICALL *GetErrorName) (jvmtiEnv* env,
+    jvmtiError error,
+    char** name_ptr);
+
+  /*   129 : Get JLocation Format */
+  jvmtiError (JNICALL *GetJLocationFormat) (jvmtiEnv* env,
+    jvmtiJlocationFormat* format_ptr);
+
+  /*   130 : Get System Properties */
+  jvmtiError (JNICALL *GetSystemProperties) (jvmtiEnv* env,
+    jint* count_ptr,
+    char*** property_ptr);
+
+  /*   131 : Get System Property */
+  jvmtiError (JNICALL *GetSystemProperty) (jvmtiEnv* env,
+    const char* property,
+    char** value_ptr);
+
+  /*   132 : Set System Property */
+  jvmtiError (JNICALL *SetSystemProperty) (jvmtiEnv* env,
+    const char* property,
+    const char* value);
+
+  /*   133 : Get Phase */
+  jvmtiError (JNICALL *GetPhase) (jvmtiEnv* env,
+    jvmtiPhase* phase_ptr);
+
+  /*   134 : Get Current Thread CPU Timer Information */
+  jvmtiError (JNICALL *GetCurrentThreadCpuTimerInfo) (jvmtiEnv* env,
+    jvmtiTimerInfo* info_ptr);
+
+  /*   135 : Get Current Thread CPU Time */
+  jvmtiError (JNICALL *GetCurrentThreadCpuTime) (jvmtiEnv* env,
+    jlong* nanos_ptr);
+
+  /*   136 : Get Thread CPU Timer Information */
+  jvmtiError (JNICALL *GetThreadCpuTimerInfo) (jvmtiEnv* env,
+    jvmtiTimerInfo* info_ptr);
+
+  /*   137 : Get Thread CPU Time */
+  jvmtiError (JNICALL *GetThreadCpuTime) (jvmtiEnv* env,
+    jthread thread,
+    jlong* nanos_ptr);
+
+  /*   138 : Get Timer Information */
+  jvmtiError (JNICALL *GetTimerInfo) (jvmtiEnv* env,
+    jvmtiTimerInfo* info_ptr);
+
+  /*   139 : Get Time */
+  jvmtiError (JNICALL *GetTime) (jvmtiEnv* env,
+    jlong* nanos_ptr);
+
+  /*   140 : Get Potential Capabilities */
+  jvmtiError (JNICALL *GetPotentialCapabilities) (jvmtiEnv* env,
+    jvmtiCapabilities* capabilities_ptr);
+
+  /*   141 :  RESERVED */
+  void *reserved141;
+
+  /*   142 : Add Capabilities */
+  jvmtiError (JNICALL *AddCapabilities) (jvmtiEnv* env,
+    const jvmtiCapabilities* capabilities_ptr);
+
+  /*   143 : Relinquish Capabilities */
+  jvmtiError (JNICALL *RelinquishCapabilities) (jvmtiEnv* env,
+    const jvmtiCapabilities* capabilities_ptr);
+
+  /*   144 : Get Available Processors */
+  jvmtiError (JNICALL *GetAvailableProcessors) (jvmtiEnv* env,
+    jint* processor_count_ptr);
+
+  /*   145 : Get Class Version Numbers */
+  jvmtiError (JNICALL *GetClassVersionNumbers) (jvmtiEnv* env,
+    jclass klass,
+    jint* minor_version_ptr,
+    jint* major_version_ptr);
+
+  /*   146 : Get Constant Pool */
+  jvmtiError (JNICALL *GetConstantPool) (jvmtiEnv* env,
+    jclass klass,
+    jint* constant_pool_count_ptr,
+    jint* constant_pool_byte_count_ptr,
+    unsigned char** constant_pool_bytes_ptr);
+
+  /*   147 : Get Environment Local Storage */
+  jvmtiError (JNICALL *GetEnvironmentLocalStorage) (jvmtiEnv* env,
+    void** data_ptr);
+
+  /*   148 : Set Environment Local Storage */
+  jvmtiError (JNICALL *SetEnvironmentLocalStorage) (jvmtiEnv* env,
+    const void* data);
+
+  /*   149 : Add To Bootstrap Class Loader Search */
+  jvmtiError (JNICALL *AddToBootstrapClassLoaderSearch) (jvmtiEnv* env,
+    const char* segment);
+
+  /*   150 : Set Verbose Flag */
+  jvmtiError (JNICALL *SetVerboseFlag) (jvmtiEnv* env,
+    jvmtiVerboseFlag flag,
+    jboolean value);
+
+  /*   151 : Add To System Class Loader Search */
+  jvmtiError (JNICALL *AddToSystemClassLoaderSearch) (jvmtiEnv* env,
+    const char* segment);
+
+  /*   152 : Retransform Classes */
+  jvmtiError (JNICALL *RetransformClasses) (jvmtiEnv* env,
+    jint class_count,
+    const jclass* classes);
+
+  /*   153 : Get Owned Monitor Stack Depth Info */
+  jvmtiError (JNICALL *GetOwnedMonitorStackDepthInfo) (jvmtiEnv* env,
+    jthread thread,
+    jint* monitor_info_count_ptr,
+    jvmtiMonitorStackDepthInfo** monitor_info_ptr);
+
+  /*   154 : Get Object Size */
+  jvmtiError (JNICALL *GetObjectSize) (jvmtiEnv* env,
+    jobject object,
+    jlong* size_ptr);
+
+  /*   155 : Get Local Instance */
+  jvmtiError (JNICALL *GetLocalInstance) (jvmtiEnv* env,
+    jthread thread,
+    jint depth,
+    jobject* value_ptr);
+
+} jvmtiInterface_1;
+
+struct _jvmtiEnv {
+    const struct jvmtiInterface_1_ *functions;
+#ifdef __cplusplus
+
+
+  jvmtiError Allocate(jlong size,
+            unsigned char** mem_ptr) {
+    return functions->Allocate(this, size, mem_ptr);
+  }
+
+  jvmtiError Deallocate(unsigned char* mem) {
+    return functions->Deallocate(this, mem);
+  }
+
+  jvmtiError GetThreadState(jthread thread,
+            jint* thread_state_ptr) {
+    return functions->GetThreadState(this, thread, thread_state_ptr);
+  }
+
+  jvmtiError GetCurrentThread(jthread* thread_ptr) {
+    return functions->GetCurrentThread(this, thread_ptr);
+  }
+
+  jvmtiError GetAllThreads(jint* threads_count_ptr,
+            jthread** threads_ptr) {
+    return functions->GetAllThreads(this, threads_count_ptr, threads_ptr);
+  }
+
+  jvmtiError SuspendThread(jthread thread) {
+    return functions->SuspendThread(this, thread);
+  }
+
+  jvmtiError SuspendThreadList(jint request_count,
+            const jthread* request_list,
+            jvmtiError* results) {
+    return functions->SuspendThreadList(this, request_count, request_list, results);
+  }
+
+  jvmtiError ResumeThread(jthread thread) {
+    return functions->ResumeThread(this, thread);
+  }
+
+  jvmtiError ResumeThreadList(jint request_count,
+            const jthread* request_list,
+            jvmtiError* results) {
+    return functions->ResumeThreadList(this, request_count, request_list, results);
+  }
+
+  jvmtiError StopThread(jthread thread,
+            jobject exception) {
+    return functions->StopThread(this, thread, exception);
+  }
+
+  jvmtiError InterruptThread(jthread thread) {
+    return functions->InterruptThread(this, thread);
+  }
+
+  jvmtiError GetThreadInfo(jthread thread,
+            jvmtiThreadInfo* info_ptr) {
+    return functions->GetThreadInfo(this, thread, info_ptr);
+  }
+
+  jvmtiError GetOwnedMonitorInfo(jthread thread,
+            jint* owned_monitor_count_ptr,
+            jobject** owned_monitors_ptr) {
+    return functions->GetOwnedMonitorInfo(this, thread, owned_monitor_count_ptr, owned_monitors_ptr);
+  }
+
+  jvmtiError GetOwnedMonitorStackDepthInfo(jthread thread,
+            jint* monitor_info_count_ptr,
+            jvmtiMonitorStackDepthInfo** monitor_info_ptr) {
+    return functions->GetOwnedMonitorStackDepthInfo(this, thread, monitor_info_count_ptr, monitor_info_ptr);
+  }
+
+  jvmtiError GetCurrentContendedMonitor(jthread thread,
+            jobject* monitor_ptr) {
+    return functions->GetCurrentContendedMonitor(this, thread, monitor_ptr);
+  }
+
+  jvmtiError RunAgentThread(jthread thread,
+            jvmtiStartFunction proc,
+            const void* arg,
+            jint priority) {
+    return functions->RunAgentThread(this, thread, proc, arg, priority);
+  }
+
+  jvmtiError SetThreadLocalStorage(jthread thread,
+            const void* data) {
+    return functions->SetThreadLocalStorage(this, thread, data);
+  }
+
+  jvmtiError GetThreadLocalStorage(jthread thread,
+            void** data_ptr) {
+    return functions->GetThreadLocalStorage(this, thread, data_ptr);
+  }
+
+  jvmtiError GetTopThreadGroups(jint* group_count_ptr,
+            jthreadGroup** groups_ptr) {
+    return functions->GetTopThreadGroups(this, group_count_ptr, groups_ptr);
+  }
+
+  jvmtiError GetThreadGroupInfo(jthreadGroup group,
+            jvmtiThreadGroupInfo* info_ptr) {
+    return functions->GetThreadGroupInfo(this, group, info_ptr);
+  }
+
+  jvmtiError GetThreadGroupChildren(jthreadGroup group,
+            jint* thread_count_ptr,
+            jthread** threads_ptr,
+            jint* group_count_ptr,
+            jthreadGroup** groups_ptr) {
+    return functions->GetThreadGroupChildren(this, group, thread_count_ptr, threads_ptr, group_count_ptr, groups_ptr);
+  }
+
+  jvmtiError GetStackTrace(jthread thread,
+            jint start_depth,
+            jint max_frame_count,
+            jvmtiFrameInfo* frame_buffer,
+            jint* count_ptr) {
+    return functions->GetStackTrace(this, thread, start_depth, max_frame_count, frame_buffer, count_ptr);
+  }
+
+  jvmtiError GetAllStackTraces(jint max_frame_count,
+            jvmtiStackInfo** stack_info_ptr,
+            jint* thread_count_ptr) {
+    return functions->GetAllStackTraces(this, max_frame_count, stack_info_ptr, thread_count_ptr);
+  }
+
+  jvmtiError GetThreadListStackTraces(jint thread_count,
+            const jthread* thread_list,
+            jint max_frame_count,
+            jvmtiStackInfo** stack_info_ptr) {
+    return functions->GetThreadListStackTraces(this, thread_count, thread_list, max_frame_count, stack_info_ptr);
+  }
+
+  jvmtiError GetFrameCount(jthread thread,
+            jint* count_ptr) {
+    return functions->GetFrameCount(this, thread, count_ptr);
+  }
+
+  jvmtiError PopFrame(jthread thread) {
+    return functions->PopFrame(this, thread);
+  }
+
+  jvmtiError GetFrameLocation(jthread thread,
+            jint depth,
+            jmethodID* method_ptr,
+            jlocation* location_ptr) {
+    return functions->GetFrameLocation(this, thread, depth, method_ptr, location_ptr);
+  }
+
+  jvmtiError NotifyFramePop(jthread thread,
+            jint depth) {
+    return functions->NotifyFramePop(this, thread, depth);
+  }
+
+  jvmtiError ForceEarlyReturnObject(jthread thread,
+            jobject value) {
+    return functions->ForceEarlyReturnObject(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnInt(jthread thread,
+            jint value) {
+    return functions->ForceEarlyReturnInt(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnLong(jthread thread,
+            jlong value) {
+    return functions->ForceEarlyReturnLong(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnFloat(jthread thread,
+            jfloat value) {
+    return functions->ForceEarlyReturnFloat(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnDouble(jthread thread,
+            jdouble value) {
+    return functions->ForceEarlyReturnDouble(this, thread, value);
+  }
+
+  jvmtiError ForceEarlyReturnVoid(jthread thread) {
+    return functions->ForceEarlyReturnVoid(this, thread);
+  }
+
+  jvmtiError FollowReferences(jint heap_filter,
+            jclass klass,
+            jobject initial_object,
+            const jvmtiHeapCallbacks* callbacks,
+            const void* user_data) {
+    return functions->FollowReferences(this, heap_filter, klass, initial_object, callbacks, user_data);
+  }
+
+  jvmtiError IterateThroughHeap(jint heap_filter,
+            jclass klass,
+            const jvmtiHeapCallbacks* callbacks,
+            const void* user_data) {
+    return functions->IterateThroughHeap(this, heap_filter, klass, callbacks, user_data);
+  }
+
+  jvmtiError GetTag(jobject object,
+            jlong* tag_ptr) {
+    return functions->GetTag(this, object, tag_ptr);
+  }
+
+  jvmtiError SetTag(jobject object,
+            jlong tag) {
+    return functions->SetTag(this, object, tag);
+  }
+
+  jvmtiError GetObjectsWithTags(jint tag_count,
+            const jlong* tags,
+            jint* count_ptr,
+            jobject** object_result_ptr,
+            jlong** tag_result_ptr) {
+    return functions->GetObjectsWithTags(this, tag_count, tags, count_ptr, object_result_ptr, tag_result_ptr);
+  }
+
+  jvmtiError ForceGarbageCollection() {
+    return functions->ForceGarbageCollection(this);
+  }
+
+  jvmtiError IterateOverObjectsReachableFromObject(jobject object,
+            jvmtiObjectReferenceCallback object_reference_callback,
+            const void* user_data) {
+    return functions->IterateOverObjectsReachableFromObject(this, object, object_reference_callback, user_data);
+  }
+
+  jvmtiError IterateOverReachableObjects(jvmtiHeapRootCallback heap_root_callback,
+            jvmtiStackReferenceCallback stack_ref_callback,
+            jvmtiObjectReferenceCallback object_ref_callback,
+            const void* user_data) {
+    return functions->IterateOverReachableObjects(this, heap_root_callback, stack_ref_callback, object_ref_callback, user_data);
+  }
+
+  jvmtiError IterateOverHeap(jvmtiHeapObjectFilter object_filter,
+            jvmtiHeapObjectCallback heap_object_callback,
+            const void* user_data) {
+    return functions->IterateOverHeap(this, object_filter, heap_object_callback, user_data);
+  }
+
+  jvmtiError IterateOverInstancesOfClass(jclass klass,
+            jvmtiHeapObjectFilter object_filter,
+            jvmtiHeapObjectCallback heap_object_callback,
+            const void* user_data) {
+    return functions->IterateOverInstancesOfClass(this, klass, object_filter, heap_object_callback, user_data);
+  }
+
+  jvmtiError GetLocalObject(jthread thread,
+            jint depth,
+            jint slot,
+            jobject* value_ptr) {
+    return functions->GetLocalObject(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError GetLocalInstance(jthread thread,
+            jint depth,
+            jobject* value_ptr) {
+    return functions->GetLocalInstance(this, thread, depth, value_ptr);
+  }
+
+  jvmtiError GetLocalInt(jthread thread,
+            jint depth,
+            jint slot,
+            jint* value_ptr) {
+    return functions->GetLocalInt(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError GetLocalLong(jthread thread,
+            jint depth,
+            jint slot,
+            jlong* value_ptr) {
+    return functions->GetLocalLong(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError GetLocalFloat(jthread thread,
+            jint depth,
+            jint slot,
+            jfloat* value_ptr) {
+    return functions->GetLocalFloat(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError GetLocalDouble(jthread thread,
+            jint depth,
+            jint slot,
+            jdouble* value_ptr) {
+    return functions->GetLocalDouble(this, thread, depth, slot, value_ptr);
+  }
+
+  jvmtiError SetLocalObject(jthread thread,
+            jint depth,
+            jint slot,
+            jobject value) {
+    return functions->SetLocalObject(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetLocalInt(jthread thread,
+            jint depth,
+            jint slot,
+            jint value) {
+    return functions->SetLocalInt(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetLocalLong(jthread thread,
+            jint depth,
+            jint slot,
+            jlong value) {
+    return functions->SetLocalLong(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetLocalFloat(jthread thread,
+            jint depth,
+            jint slot,
+            jfloat value) {
+    return functions->SetLocalFloat(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetLocalDouble(jthread thread,
+            jint depth,
+            jint slot,
+            jdouble value) {
+    return functions->SetLocalDouble(this, thread, depth, slot, value);
+  }
+
+  jvmtiError SetBreakpoint(jmethodID method,
+            jlocation location) {
+    return functions->SetBreakpoint(this, method, location);
+  }
+
+  jvmtiError ClearBreakpoint(jmethodID method,
+            jlocation location) {
+    return functions->ClearBreakpoint(this, method, location);
+  }
+
+  jvmtiError SetFieldAccessWatch(jclass klass,
+            jfieldID field) {
+    return functions->SetFieldAccessWatch(this, klass, field);
+  }
+
+  jvmtiError ClearFieldAccessWatch(jclass klass,
+            jfieldID field) {
+    return functions->ClearFieldAccessWatch(this, klass, field);
+  }
+
+  jvmtiError SetFieldModificationWatch(jclass klass,
+            jfieldID field) {
+    return functions->SetFieldModificationWatch(this, klass, field);
+  }
+
+  jvmtiError ClearFieldModificationWatch(jclass klass,
+            jfieldID field) {
+    return functions->ClearFieldModificationWatch(this, klass, field);
+  }
+
+  jvmtiError GetLoadedClasses(jint* class_count_ptr,
+            jclass** classes_ptr) {
+    return functions->GetLoadedClasses(this, class_count_ptr, classes_ptr);
+  }
+
+  jvmtiError GetClassLoaderClasses(jobject initiating_loader,
+            jint* class_count_ptr,
+            jclass** classes_ptr) {
+    return functions->GetClassLoaderClasses(this, initiating_loader, class_count_ptr, classes_ptr);
+  }
+
+  jvmtiError GetClassSignature(jclass klass,
+            char** signature_ptr,
+            char** generic_ptr) {
+    return functions->GetClassSignature(this, klass, signature_ptr, generic_ptr);
+  }
+
+  jvmtiError GetClassStatus(jclass klass,
+            jint* status_ptr) {
+    return functions->GetClassStatus(this, klass, status_ptr);
+  }
+
+  jvmtiError GetSourceFileName(jclass klass,
+            char** source_name_ptr) {
+    return functions->GetSourceFileName(this, klass, source_name_ptr);
+  }
+
+  jvmtiError GetClassModifiers(jclass klass,
+            jint* modifiers_ptr) {
+    return functions->GetClassModifiers(this, klass, modifiers_ptr);
+  }
+
+  jvmtiError GetClassMethods(jclass klass,
+            jint* method_count_ptr,
+            jmethodID** methods_ptr) {
+    return functions->GetClassMethods(this, klass, method_count_ptr, methods_ptr);
+  }
+
+  jvmtiError GetClassFields(jclass klass,
+            jint* field_count_ptr,
+            jfieldID** fields_ptr) {
+    return functions->GetClassFields(this, klass, field_count_ptr, fields_ptr);
+  }
+
+  jvmtiError GetImplementedInterfaces(jclass klass,
+            jint* interface_count_ptr,
+            jclass** interfaces_ptr) {
+    return functions->GetImplementedInterfaces(this, klass, interface_count_ptr, interfaces_ptr);
+  }
+
+  jvmtiError GetClassVersionNumbers(jclass klass,
+            jint* minor_version_ptr,
+            jint* major_version_ptr) {
+    return functions->GetClassVersionNumbers(this, klass, minor_version_ptr, major_version_ptr);
+  }
+
+  jvmtiError GetConstantPool(jclass klass,
+            jint* constant_pool_count_ptr,
+            jint* constant_pool_byte_count_ptr,
+            unsigned char** constant_pool_bytes_ptr) {
+    return functions->GetConstantPool(this, klass, constant_pool_count_ptr, constant_pool_byte_count_ptr, constant_pool_bytes_ptr);
+  }
+
+  jvmtiError IsInterface(jclass klass,
+            jboolean* is_interface_ptr) {
+    return functions->IsInterface(this, klass, is_interface_ptr);
+  }
+
+  jvmtiError IsArrayClass(jclass klass,
+            jboolean* is_array_class_ptr) {
+    return functions->IsArrayClass(this, klass, is_array_class_ptr);
+  }
+
+  jvmtiError IsModifiableClass(jclass klass,
+            jboolean* is_modifiable_class_ptr) {
+    return functions->IsModifiableClass(this, klass, is_modifiable_class_ptr);
+  }
+
+  jvmtiError GetClassLoader(jclass klass,
+            jobject* classloader_ptr) {
+    return functions->GetClassLoader(this, klass, classloader_ptr);
+  }
+
+  jvmtiError GetSourceDebugExtension(jclass klass,
+            char** source_debug_extension_ptr) {
+    return functions->GetSourceDebugExtension(this, klass, source_debug_extension_ptr);
+  }
+
+  jvmtiError RetransformClasses(jint class_count,
+            const jclass* classes) {
+    return functions->RetransformClasses(this, class_count, classes);
+  }
+
+  jvmtiError RedefineClasses(jint class_count,
+            const jvmtiClassDefinition* class_definitions) {
+    return functions->RedefineClasses(this, class_count, class_definitions);
+  }
+
+  jvmtiError GetObjectSize(jobject object,
+            jlong* size_ptr) {
+    return functions->GetObjectSize(this, object, size_ptr);
+  }
+
+  jvmtiError GetObjectHashCode(jobject object,
+            jint* hash_code_ptr) {
+    return functions->GetObjectHashCode(this, object, hash_code_ptr);
+  }
+
+  jvmtiError GetObjectMonitorUsage(jobject object,
+            jvmtiMonitorUsage* info_ptr) {
+    return functions->GetObjectMonitorUsage(this, object, info_ptr);
+  }
+
+  jvmtiError GetFieldName(jclass klass,
+            jfieldID field,
+            char** name_ptr,
+            char** signature_ptr,
+            char** generic_ptr) {
+    return functions->GetFieldName(this, klass, field, name_ptr, signature_ptr, generic_ptr);
+  }
+
+  jvmtiError GetFieldDeclaringClass(jclass klass,
+            jfieldID field,
+            jclass* declaring_class_ptr) {
+    return functions->GetFieldDeclaringClass(this, klass, field, declaring_class_ptr);
+  }
+
+  jvmtiError GetFieldModifiers(jclass klass,
+            jfieldID field,
+            jint* modifiers_ptr) {
+    return functions->GetFieldModifiers(this, klass, field, modifiers_ptr);
+  }
+
+  jvmtiError IsFieldSynthetic(jclass klass,
+            jfieldID field,
+            jboolean* is_synthetic_ptr) {
+    return functions->IsFieldSynthetic(this, klass, field, is_synthetic_ptr);
+  }
+
+  jvmtiError GetMethodName(jmethodID method,
+            char** name_ptr,
+            char** signature_ptr,
+            char** generic_ptr) {
+    return functions->GetMethodName(this, method, name_ptr, signature_ptr, generic_ptr);
+  }
+
+  jvmtiError GetMethodDeclaringClass(jmethodID method,
+            jclass* declaring_class_ptr) {
+    return functions->GetMethodDeclaringClass(this, method, declaring_class_ptr);
+  }
+
+  jvmtiError GetMethodModifiers(jmethodID method,
+            jint* modifiers_ptr) {
+    return functions->GetMethodModifiers(this, method, modifiers_ptr);
+  }
+
+  jvmtiError GetMaxLocals(jmethodID method,
+            jint* max_ptr) {
+    return functions->GetMaxLocals(this, method, max_ptr);
+  }
+
+  jvmtiError GetArgumentsSize(jmethodID method,
+            jint* size_ptr) {
+    return functions->GetArgumentsSize(this, method, size_ptr);
+  }
+
+  jvmtiError GetLineNumberTable(jmethodID method,
+            jint* entry_count_ptr,
+            jvmtiLineNumberEntry** table_ptr) {
+    return functions->GetLineNumberTable(this, method, entry_count_ptr, table_ptr);
+  }
+
+  jvmtiError GetMethodLocation(jmethodID method,
+            jlocation* start_location_ptr,
+            jlocation* end_location_ptr) {
+    return functions->GetMethodLocation(this, method, start_location_ptr, end_location_ptr);
+  }
+
+  jvmtiError GetLocalVariableTable(jmethodID method,
+            jint* entry_count_ptr,
+            jvmtiLocalVariableEntry** table_ptr) {
+    return functions->GetLocalVariableTable(this, method, entry_count_ptr, table_ptr);
+  }
+
+  jvmtiError GetBytecodes(jmethodID method,
+            jint* bytecode_count_ptr,
+            unsigned char** bytecodes_ptr) {
+    return functions->GetBytecodes(this, method, bytecode_count_ptr, bytecodes_ptr);
+  }
+
+  jvmtiError IsMethodNative(jmethodID method,
+            jboolean* is_native_ptr) {
+    return functions->IsMethodNative(this, method, is_native_ptr);
+  }
+
+  jvmtiError IsMethodSynthetic(jmethodID method,
+            jboolean* is_synthetic_ptr) {
+    return functions->IsMethodSynthetic(this, method, is_synthetic_ptr);
+  }
+
+  jvmtiError IsMethodObsolete(jmethodID method,
+            jboolean* is_obsolete_ptr) {
+    return functions->IsMethodObsolete(this, method, is_obsolete_ptr);
+  }
+
+  jvmtiError SetNativeMethodPrefix(const char* prefix) {
+    return functions->SetNativeMethodPrefix(this, prefix);
+  }
+
+  jvmtiError SetNativeMethodPrefixes(jint prefix_count,
+            char** prefixes) {
+    return functions->SetNativeMethodPrefixes(this, prefix_count, prefixes);
+  }
+
+  jvmtiError CreateRawMonitor(const char* name,
+            jrawMonitorID* monitor_ptr) {
+    return functions->CreateRawMonitor(this, name, monitor_ptr);
+  }
+
+  jvmtiError DestroyRawMonitor(jrawMonitorID monitor) {
+    return functions->DestroyRawMonitor(this, monitor);
+  }
+
+  jvmtiError RawMonitorEnter(jrawMonitorID monitor) {
+    return functions->RawMonitorEnter(this, monitor);
+  }
+
+  jvmtiError RawMonitorExit(jrawMonitorID monitor) {
+    return functions->RawMonitorExit(this, monitor);
+  }
+
+  jvmtiError RawMonitorWait(jrawMonitorID monitor,
+            jlong millis) {
+    return functions->RawMonitorWait(this, monitor, millis);
+  }
+
+  jvmtiError RawMonitorNotify(jrawMonitorID monitor) {
+    return functions->RawMonitorNotify(this, monitor);
+  }
+
+  jvmtiError RawMonitorNotifyAll(jrawMonitorID monitor) {
+    return functions->RawMonitorNotifyAll(this, monitor);
+  }
+
+  jvmtiError SetJNIFunctionTable(const jniNativeInterface* function_table) {
+    return functions->SetJNIFunctionTable(this, function_table);
+  }
+
+  jvmtiError GetJNIFunctionTable(jniNativeInterface** function_table) {
+    return functions->GetJNIFunctionTable(this, function_table);
+  }
+
+  jvmtiError SetEventCallbacks(const jvmtiEventCallbacks* callbacks,
+            jint size_of_callbacks) {
+    return functions->SetEventCallbacks(this, callbacks, size_of_callbacks);
+  }
+
+  jvmtiError SetEventNotificationMode(jvmtiEventMode mode,
+            jvmtiEvent event_type,
+            jthread event_thread,
+             ...) {
+    return functions->SetEventNotificationMode(this, mode, event_type, event_thread);
+  }
+
+  jvmtiError GenerateEvents(jvmtiEvent event_type) {
+    return functions->GenerateEvents(this, event_type);
+  }
+
+  jvmtiError GetExtensionFunctions(jint* extension_count_ptr,
+            jvmtiExtensionFunctionInfo** extensions) {
+    return functions->GetExtensionFunctions(this, extension_count_ptr, extensions);
+  }
+
+  jvmtiError GetExtensionEvents(jint* extension_count_ptr,
+            jvmtiExtensionEventInfo** extensions) {
+    return functions->GetExtensionEvents(this, extension_count_ptr, extensions);
+  }
+
+  jvmtiError SetExtensionEventCallback(jint extension_event_index,
+            jvmtiExtensionEvent callback) {
+    return functions->SetExtensionEventCallback(this, extension_event_index, callback);
+  }
+
+  jvmtiError GetPotentialCapabilities(jvmtiCapabilities* capabilities_ptr) {
+    return functions->GetPotentialCapabilities(this, capabilities_ptr);
+  }
+
+  jvmtiError AddCapabilities(const jvmtiCapabilities* capabilities_ptr) {
+    return functions->AddCapabilities(this, capabilities_ptr);
+  }
+
+  jvmtiError RelinquishCapabilities(const jvmtiCapabilities* capabilities_ptr) {
+    return functions->RelinquishCapabilities(this, capabilities_ptr);
+  }
+
+  jvmtiError GetCapabilities(jvmtiCapabilities* capabilities_ptr) {
+    return functions->GetCapabilities(this, capabilities_ptr);
+  }
+
+  jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiTimerInfo* info_ptr) {
+    return functions->GetCurrentThreadCpuTimerInfo(this, info_ptr);
+  }
+
+  jvmtiError GetCurrentThreadCpuTime(jlong* nanos_ptr) {
+    return functions->GetCurrentThreadCpuTime(this, nanos_ptr);
+  }
+
+  jvmtiError GetThreadCpuTimerInfo(jvmtiTimerInfo* info_ptr) {
+    return functions->GetThreadCpuTimerInfo(this, info_ptr);
+  }
+
+  jvmtiError GetThreadCpuTime(jthread thread,
+            jlong* nanos_ptr) {
+    return functions->GetThreadCpuTime(this, thread, nanos_ptr);
+  }
+
+  jvmtiError GetTimerInfo(jvmtiTimerInfo* info_ptr) {
+    return functions->GetTimerInfo(this, info_ptr);
+  }
+
+  jvmtiError GetTime(jlong* nanos_ptr) {
+    return functions->GetTime(this, nanos_ptr);
+  }
+
+  jvmtiError GetAvailableProcessors(jint* processor_count_ptr) {
+    return functions->GetAvailableProcessors(this, processor_count_ptr);
+  }
+
+  jvmtiError AddToBootstrapClassLoaderSearch(const char* segment) {
+    return functions->AddToBootstrapClassLoaderSearch(this, segment);
+  }
+
+  jvmtiError AddToSystemClassLoaderSearch(const char* segment) {
+    return functions->AddToSystemClassLoaderSearch(this, segment);
+  }
+
+  jvmtiError GetSystemProperties(jint* count_ptr,
+            char*** property_ptr) {
+    return functions->GetSystemProperties(this, count_ptr, property_ptr);
+  }
+
+  jvmtiError GetSystemProperty(const char* property,
+            char** value_ptr) {
+    return functions->GetSystemProperty(this, property, value_ptr);
+  }
+
+  jvmtiError SetSystemProperty(const char* property,
+            const char* value) {
+    return functions->SetSystemProperty(this, property, value);
+  }
+
+  jvmtiError GetPhase(jvmtiPhase* phase_ptr) {
+    return functions->GetPhase(this, phase_ptr);
+  }
+
+  jvmtiError DisposeEnvironment() {
+    return functions->DisposeEnvironment(this);
+  }
+
+  jvmtiError SetEnvironmentLocalStorage(const void* data) {
+    return functions->SetEnvironmentLocalStorage(this, data);
+  }
+
+  jvmtiError GetEnvironmentLocalStorage(void** data_ptr) {
+    return functions->GetEnvironmentLocalStorage(this, data_ptr);
+  }
+
+  jvmtiError GetVersionNumber(jint* version_ptr) {
+    return functions->GetVersionNumber(this, version_ptr);
+  }
+
+  jvmtiError GetErrorName(jvmtiError error,
+            char** name_ptr) {
+    return functions->GetErrorName(this, error, name_ptr);
+  }
+
+  jvmtiError SetVerboseFlag(jvmtiVerboseFlag flag,
+            jboolean value) {
+    return functions->SetVerboseFlag(this, flag, value);
+  }
+
+  jvmtiError GetJLocationFormat(jvmtiJlocationFormat* format_ptr) {
+    return functions->GetJLocationFormat(this, format_ptr);
+  }
+
+#endif /* __cplusplus */
+};
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* !_JAVA_JVMTI_H_ */
+
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index f45e9f6..1db09b4 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -53,8 +53,9 @@
 
 File* OS::OpenFileWithFlags(const char* name, int flags) {
   CHECK(name != nullptr);
-  std::unique_ptr<File> file(new File);
-  if (!file->Open(name, flags, 0666)) {
+  bool read_only = ((flags & O_ACCMODE) == O_RDONLY);
+  std::unique_ptr<File> file(new File(name, flags, 0666, !read_only));
+  if (!file->IsOpened()) {
     return nullptr;
   }
   return file.release();
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index b25a1bb..4f70b04 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -23,6 +23,7 @@
 #include "gc/heap.h"
 #include "monitor.h"
 #include "runtime.h"
+#include "ti/agent.h"
 #include "trace.h"
 #include "utils.h"
 
@@ -90,6 +91,13 @@
       .Define({"-Xrunjdwp:_", "-agentlib:jdwp=_"})
           .WithType<JDWP::JdwpOptions>()
           .IntoKey(M::JdwpOptions)
+      // TODO Re-enable -agentlib: once I have a good way to transform the values.
+      // .Define("-agentlib:_")
+      //     .WithType<std::vector<ti::Agent>>().AppendValues()
+      //     .IntoKey(M::AgentLib)
+      .Define("-agentpath:_")
+          .WithType<std::vector<ti::Agent>>().AppendValues()
+          .IntoKey(M::AgentPath)
       .Define("-Xms_")
           .WithType<MemoryKiB>()
           .IntoKey(M::MemoryInitialSize)
@@ -176,8 +184,13 @@
           .WithType<unsigned int>()
           .IntoKey(M::JITInvokeTransitionWeight)
       .Define("-Xjitsaveprofilinginfo")
-          .WithValue(true)
-          .IntoKey(M::JITSaveProfilingInfo)
+          .WithType<ProfileSaverOptions>()
+          .AppendValues()
+          .IntoKey(M::ProfileSaverOpts)
+      .Define("-Xps-_")  // profile saver options -Xps-<key>:<value>
+          .WithType<ProfileSaverOptions>()
+          .AppendValues()
+          .IntoKey(M::ProfileSaverOpts)  // NOTE: Appends into same key as -Xjitsaveprofilinginfo
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
@@ -244,14 +257,6 @@
                          {"wallclock",      TraceClockSource::kWall},
                          {"dualclock",      TraceClockSource::kDual}})
           .IntoKey(M::ProfileClock)
-      .Define("-Xenable-profiler")
-          .WithType<TestProfilerOptions>()
-          .AppendValues()
-          .IntoKey(M::ProfilerOpts)  // NOTE: Appends into same key as -Xprofile-*
-      .Define("-Xprofile-_")  // -Xprofile-<key>:<value>
-          .WithType<TestProfilerOptions>()
-          .AppendValues()
-          .IntoKey(M::ProfilerOpts)  // NOTE: Appends into same key as -Xenable-profiler
       .Define("-Xcompiler:_")
           .WithType<std::string>()
           .IntoKey(M::Compiler)
@@ -292,9 +297,9 @@
           .IntoKey(M::Experimental)
       .Define("-Xforce-nb-testing")
           .IntoKey(M::ForceNativeBridge)
-      .Define("-XOatFileManagerCompilerFilter:_")
-          .WithType<std::string>()
-          .IntoKey(M::OatFileManagerCompilerFilter)
+      .Define("-Xplugin:_")
+          .WithType<std::vector<Plugin>>().AppendValues()
+          .IntoKey(M::Plugins)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -493,7 +498,7 @@
   args.SetIfMissing(M::ParallelGCThreads, gc::Heap::kDefaultEnableParallelGC ?
       static_cast<unsigned int>(sysconf(_SC_NPROCESSORS_CONF) - 1u) : 0u);
 
-  // -Xverbose:
+  // -verbose:
   {
     LogVerbosity *log_verbosity = args.Get(M::Verbose);
     if (log_verbosity != nullptr) {
@@ -589,10 +594,40 @@
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
-  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kLambdas) {
-    LOG(WARNING) << "Experimental lambdas have been enabled. All lambda opcodes have "
-                 << "an unstable specification and are nearly guaranteed to change over time. "
-                 << "Do not attempt to write shipping code against these opcodes.";
+  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kRuntimePlugins) {
+    LOG(WARNING) << "Experimental runtime plugin support has been enabled. No guarantees are made "
+                 << "about stability or usage of this plugin support. Use at your own risk. Do "
+                 << "not attempt to write shipping code that relies on the implementation of "
+                 << "runtime plugins.";
+  } else if (!args.GetOrDefault(M::Plugins).empty()) {
+    LOG(WARNING) << "Experimental runtime plugin support has not been enabled. Ignored options: ";
+    for (auto& op : args.GetOrDefault(M::Plugins)) {
+      LOG(WARNING) << "    -plugin:" << op.GetLibrary();
+    }
+  }
+
+  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kAgents) {
+    LOG(WARNING) << "Experimental runtime agent support has been enabled. No guarantees are made "
+                 << "the completeness, accuracy, reliability, or stability of the agent "
+                 << "implementation. Use at your own risk. Do not attempt to write shipping code "
+                 << "that relies on the implementation of any part of this api.";
+  } else if (!args.GetOrDefault(M::AgentLib).empty() || !args.GetOrDefault(M::AgentPath).empty()) {
+    LOG(WARNING) << "agent support has not been enabled. Enable experimental agent "
+                 << " support with '-XExperimental:agent'. Ignored options are:";
+    for (auto op : args.GetOrDefault(M::AgentLib)) {
+      if (op.HasArgs()) {
+        LOG(WARNING) << "    -agentlib:" << op.GetName() << "=" << op.GetArgs();
+      } else {
+        LOG(WARNING) << "    -agentlib:" << op.GetName();
+      }
+    }
+    for (auto op : args.GetOrDefault(M::AgentPath)) {
+      if (op.HasArgs()) {
+        LOG(WARNING) << "    -agentpath:" << op.GetName() << "=" << op.GetArgs();
+      } else {
+        LOG(WARNING) << "    -agentpath:" << op.GetName();
+      }
+    }
   }
 
   *runtime_options = std::move(args);
@@ -639,6 +674,11 @@
   UsageMessage(stream, "  -showversion\n");
   UsageMessage(stream, "  -help\n");
   UsageMessage(stream, "  -agentlib:jdwp=options\n");
+  // TODO add back in once -agentlib actually does something.
+  // UsageMessage(stream, "  -agentlib:library=options (Experimental feature, "
+  //                      "requires -Xexperimental:agent, some features might not be supported)\n");
+  UsageMessage(stream, "  -agentpath:library_path=options (Experimental feature, "
+                       "requires -Xexperimental:agent, some features might not be supported)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following extended options are supported:\n");
@@ -693,17 +733,13 @@
   UsageMessage(stream, "  -Xmethod-trace\n");
   UsageMessage(stream, "  -Xmethod-trace-file:filename");
   UsageMessage(stream, "  -Xmethod-trace-file-size:integervalue\n");
-  UsageMessage(stream, "  -Xenable-profiler\n");
-  UsageMessage(stream, "  -Xprofile-filename:filename\n");
-  UsageMessage(stream, "  -Xprofile-period:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-duration:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-interval:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-backoff:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-start-immediately\n");
-  UsageMessage(stream, "  -Xprofile-top-k-threshold:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-top-k-change-threshold:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-type:{method,stack}\n");
-  UsageMessage(stream, "  -Xprofile-max-stack-depth:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-save-period-ms:integervalue\n");
+  UsageMessage(stream, "  -Xps-save-resolved-classes-delay-ms:integervalue\n");
+  UsageMessage(stream, "  -Xps-startup-method-samples:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-methods-to-save:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-classes-to-save:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-notification-before-wake:integervalue\n");
+  UsageMessage(stream, "  -Xps-max-notification-before-wake:integervalue\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
@@ -719,8 +755,12 @@
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
   UsageMessage(stream, "  -Xno-dex-file-fallback "
                        "(Don't fall back to dex files without oat files)\n");
-  UsageMessage(stream, "  -Xexperimental:lambdas "
-                       "(Enable new and experimental dalvik opcodes and semantics)\n");
+  UsageMessage(stream, "  -Xplugin:<library.so> "
+                       "(Load a runtime plugin, requires -Xexperimental:runtime-plugins)\n");
+  UsageMessage(stream, "  -Xexperimental:runtime-plugins"
+                       "(Enable new and experimental agent support)\n");
+  UsageMessage(stream, "  -Xexperimental:agents"
+                       "(Enable new and experimental agent support)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 5974fb6..1f5beb9 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -26,7 +26,7 @@
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "arch/instruction_set.h"
-#include "profiler_options.h"
+#include "jit/profile_saver_options.h"
 #include "runtime_options.h"
 
 namespace art {
diff --git a/runtime/plugin.cc b/runtime/plugin.cc
new file mode 100644
index 0000000..481b1ca
--- /dev/null
+++ b/runtime/plugin.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin.h"
+
+#include <dlfcn.h>
+#include "base/stringprintf.h"
+#include "base/logging.h"
+
+namespace art {
+
+const char* PLUGIN_INITIALIZATION_FUNCTION_NAME = "ArtPlugin_Initialize";
+const char* PLUGIN_DEINITIALIZATION_FUNCTION_NAME = "ArtPlugin_Deinitialize";
+
+Plugin::Plugin(const Plugin& other) : library_(other.library_), dlopen_handle_(nullptr) {
+  if (other.IsLoaded()) {
+    std::string err;
+    Load(&err);
+  }
+}
+
+bool Plugin::Load(/*out*/std::string* error_msg) {
+  DCHECK(!IsLoaded());
+  void* res = dlopen(library_.c_str(), RTLD_LAZY);
+  if (res == nullptr) {
+    *error_msg = StringPrintf("dlopen failed: %s", dlerror());
+    return false;
+  }
+  // Get the initializer function
+  PluginInitializationFunction init = reinterpret_cast<PluginInitializationFunction>(
+      dlsym(res, PLUGIN_INITIALIZATION_FUNCTION_NAME));
+  if (init != nullptr) {
+    if (!init()) {
+      dlclose(res);
+      *error_msg = StringPrintf("Initialization of plugin failed");
+      return false;
+    }
+  } else {
+    LOG(WARNING) << this << " does not include an initialization function";
+  }
+  dlopen_handle_ = res;
+  return true;
+}
+
+bool Plugin::Unload() {
+  DCHECK(IsLoaded());
+  bool ret = true;
+  void* handle = dlopen_handle_;
+  PluginDeinitializationFunction deinit = reinterpret_cast<PluginDeinitializationFunction>(
+      dlsym(handle, PLUGIN_DEINITIALIZATION_FUNCTION_NAME));
+  if (deinit != nullptr) {
+    if (!deinit()) {
+      LOG(WARNING) << this << " failed deinitialization";
+      ret = false;
+    }
+  } else {
+    LOG(WARNING) << this << " does not include a deinitialization function";
+  }
+  dlopen_handle_ = nullptr;
+  if (dlclose(handle) != 0) {
+    LOG(ERROR) << this << " failed to dlclose: " << dlerror();
+    ret = false;
+  }
+  return ret;
+}
+
+std::ostream& operator<<(std::ostream &os, const Plugin* m) {
+  return os << *m;
+}
+
+std::ostream& operator<<(std::ostream &os, Plugin const& m) {
+  return os << "Plugin { library=\"" << m.library_ << "\", handle=" << m.dlopen_handle_ << " }";
+}
+
+}  // namespace art
diff --git a/runtime/plugin.h b/runtime/plugin.h
new file mode 100644
index 0000000..18f3977
--- /dev/null
+++ b/runtime/plugin.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_PLUGIN_H_
+#define ART_RUNTIME_PLUGIN_H_
+
+#include <string>
+#include "base/logging.h"
+
+namespace art {
+
+// This function is loaded from the plugin (if present) and called during runtime initialization.
+// By the time this has been called the runtime has been fully initialized but not other native
+// libraries have been loaded yet. Failure to initialize is considered a fatal error.
+// TODO might want to give initialization function some arguments
+using PluginInitializationFunction = bool (*)();
+using PluginDeinitializationFunction = bool (*)();
+
+// A class encapsulating a plugin. There is no stable plugin ABI or API and likely never will be.
+// TODO Might want to put some locking in this but ATM we only load these at initialization in a
+// single-threaded fashion so not much need
+class Plugin {
+ public:
+  static Plugin Create(std::string lib) {
+    return Plugin(lib);
+  }
+
+  bool IsLoaded() const {
+    return dlopen_handle_ != nullptr;
+  }
+
+  const std::string& GetLibrary() const {
+    return library_;
+  }
+
+  bool Load(/*out*/std::string* error_msg);
+  bool Unload();
+
+
+  ~Plugin() {
+    if (IsLoaded() && !Unload()) {
+      LOG(ERROR) << "Error unloading " << this;
+    }
+  }
+
+  Plugin(const Plugin& other);
+
+  // Create move constructor for putting this in a list
+  Plugin(Plugin&& other)
+      : library_(other.library_),
+        dlopen_handle_(other.dlopen_handle_) {
+    other.dlopen_handle_ = nullptr;
+  }
+
+ private:
+  explicit Plugin(std::string library) : library_(library), dlopen_handle_(nullptr) { }
+
+  std::string library_;
+  void* dlopen_handle_;
+
+  friend std::ostream& operator<<(std::ostream &os, Plugin const& m);
+};
+
+std::ostream& operator<<(std::ostream &os, Plugin const& m);
+std::ostream& operator<<(std::ostream &os, const Plugin* m);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_PLUGIN_H_
diff --git a/runtime/prebuilt_tools_test.cc b/runtime/prebuilt_tools_test.cc
index eb226d4..c2b34c8 100644
--- a/runtime/prebuilt_tools_test.cc
+++ b/runtime/prebuilt_tools_test.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class PrebuiltToolsTest : public CommonRuntimeTest {
 };
@@ -61,6 +61,6 @@
   }
 }
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
deleted file mode 100644
index 6a77a9e..0000000
--- a/runtime/profiler.cc
+++ /dev/null
@@ -1,920 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "profiler.h"
-
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/uio.h>
-
-#include <fstream>
-
-#include "art_method-inl.h"
-#include "base/stl_util.h"
-#include "base/time_utils.h"
-#include "base/unix_file/fd_file.h"
-#include "class_linker.h"
-#include "common_throws.h"
-#include "dex_file-inl.h"
-#include "instrumentation.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
-#include "os.h"
-#include "scoped_thread_state_change.h"
-#include "ScopedLocalRef.h"
-#include "thread.h"
-#include "thread_list.h"
-#include "utils.h"
-
-#include "entrypoints/quick/quick_entrypoints.h"
-
-namespace art {
-
-BackgroundMethodSamplingProfiler* BackgroundMethodSamplingProfiler::profiler_ = nullptr;
-pthread_t BackgroundMethodSamplingProfiler::profiler_pthread_ = 0U;
-volatile bool BackgroundMethodSamplingProfiler::shutting_down_ = false;
-
-// TODO: this profiler runs regardless of the state of the machine.  Maybe we should use the
-// wakelock or something to modify the run characteristics.  This can be done when we
-// have some performance data after it's been used for a while.
-
-// Walk through the method within depth of max_depth_ on the Java stack
-class BoundedStackVisitor : public StackVisitor {
- public:
-  BoundedStackVisitor(std::vector<std::pair<ArtMethod*, uint32_t>>* stack,
-                      Thread* thread,
-                      uint32_t max_depth)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        stack_(stack),
-        max_depth_(max_depth),
-        depth_(0) {}
-
-  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
-    ArtMethod* m = GetMethod();
-    if (m->IsRuntimeMethod()) {
-      return true;
-    }
-    uint32_t dex_pc_ = GetDexPc();
-    stack_->push_back(std::make_pair(m, dex_pc_));
-    ++depth_;
-    if (depth_ < max_depth_) {
-      return true;
-    } else {
-      return false;
-    }
-  }
-
- private:
-  std::vector<std::pair<ArtMethod*, uint32_t>>* const stack_;
-  const uint32_t max_depth_;
-  uint32_t depth_;
-
-  DISALLOW_COPY_AND_ASSIGN(BoundedStackVisitor);
-};
-
-// This is called from either a thread list traversal or from a checkpoint.  Regardless
-// of which caller, the mutator lock must be held.
-static void GetSample(Thread* thread, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
-  BackgroundMethodSamplingProfiler* profiler =
-      reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-  const ProfilerOptions profile_options = profiler->GetProfilerOptions();
-  switch (profile_options.GetProfileType()) {
-    case kProfilerMethod: {
-      ArtMethod* method = thread->GetCurrentMethod(nullptr);
-      if ((false) && method == nullptr) {
-        LOG(INFO) << "No current method available";
-        std::ostringstream os;
-        thread->Dump(os);
-        std::string data(os.str());
-        LOG(INFO) << data;
-      }
-      profiler->RecordMethod(method);
-      break;
-    }
-    case kProfilerBoundedStack: {
-      std::vector<InstructionLocation> stack;
-      uint32_t max_depth = profile_options.GetMaxStackDepth();
-      BoundedStackVisitor bounded_stack_visitor(&stack, thread, max_depth);
-      bounded_stack_visitor.WalkStack();
-      profiler->RecordStack(stack);
-      break;
-    }
-    default:
-      LOG(INFO) << "This profile type is not implemented.";
-  }
-}
-
-// A closure that is called by the thread checkpoint code.
-class SampleCheckpoint FINAL : public Closure {
- public:
-  explicit SampleCheckpoint(BackgroundMethodSamplingProfiler* const profiler) :
-    profiler_(profiler) {}
-
-  void Run(Thread* thread) OVERRIDE {
-    Thread* self = Thread::Current();
-    if (thread == nullptr) {
-      LOG(ERROR) << "Checkpoint with nullptr thread";
-      return;
-    }
-
-    // Grab the mutator lock (shared access).
-    ScopedObjectAccess soa(self);
-
-    // Grab a sample.
-    GetSample(thread, this->profiler_);
-
-    // And finally tell the barrier that we're done.
-    this->profiler_->GetBarrier().Pass(self);
-  }
-
- private:
-  BackgroundMethodSamplingProfiler* const profiler_;
-};
-
-bool BackgroundMethodSamplingProfiler::ShuttingDown(Thread* self) {
-  MutexLock mu(self, *Locks::profiler_lock_);
-  return shutting_down_;
-}
-
-void* BackgroundMethodSamplingProfiler::RunProfilerThread(void* arg) {
-  Runtime* runtime = Runtime::Current();
-  BackgroundMethodSamplingProfiler* profiler =
-      reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-
-  // Add a random delay for the first time run so that we don't hammer the CPU
-  // with all profiles running at the same time.
-  const int kRandomDelayMaxSecs = 30;
-  const double kMaxBackoffSecs = 24*60*60;   // Max backoff time.
-
-  srand(MicroTime() * getpid());
-  int startup_delay = rand() % kRandomDelayMaxSecs;   // random delay for startup.
-
-
-  CHECK(runtime->AttachCurrentThread("Profiler", true, runtime->GetSystemThreadGroup(),
-                                      !runtime->IsAotCompiler()));
-
-  Thread* self = Thread::Current();
-
-  double backoff = 1.0;
-  while (true) {
-    if (ShuttingDown(self)) {
-      break;
-    }
-
-    {
-      // wait until we need to run another profile
-      uint64_t delay_secs = profiler->options_.GetPeriodS() * backoff;
-
-      // Add a startup delay to prevent all the profiles running at once.
-      delay_secs += startup_delay;
-
-      // Immediate startup for benchmarking?
-      if (profiler->options_.GetStartImmediately() && startup_delay > 0) {
-        delay_secs = 0;
-      }
-
-      startup_delay = 0;
-
-      VLOG(profiler) << "Delaying profile start for " << delay_secs << " secs";
-      MutexLock mu(self, profiler->wait_lock_);
-      profiler->period_condition_.TimedWait(self, delay_secs * 1000, 0);
-      // We were either signaled by Stop or timedout, in either case ignore the timed out result.
-
-      // Expand the backoff by its coefficient, but don't go beyond the max.
-      backoff = std::min(backoff * profiler->options_.GetBackoffCoefficient(), kMaxBackoffSecs);
-    }
-
-    if (ShuttingDown(self)) {
-      break;
-    }
-
-
-    uint64_t start_us = MicroTime();
-    uint64_t end_us = start_us + profiler->options_.GetDurationS() * UINT64_C(1000000);
-    uint64_t now_us = start_us;
-
-    VLOG(profiler) << "Starting profiling run now for "
-                   << PrettyDuration((end_us - start_us) * 1000);
-
-    SampleCheckpoint check_point(profiler);
-
-    size_t valid_samples = 0;
-    while (now_us < end_us) {
-      if (ShuttingDown(self)) {
-        break;
-      }
-
-      usleep(profiler->options_.GetIntervalUs());    // Non-interruptible sleep.
-
-      ThreadList* thread_list = runtime->GetThreadList();
-
-      profiler->profiler_barrier_->Init(self, 0);
-      size_t barrier_count = thread_list->RunCheckpointOnRunnableThreads(&check_point);
-
-      // All threads are suspended, nothing to do.
-      if (barrier_count == 0) {
-        now_us = MicroTime();
-        continue;
-      }
-
-      valid_samples += barrier_count;
-
-      ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-
-      // Wait for the barrier to be crossed by all runnable threads.  This wait
-      // is done with a timeout so that we can detect problems with the checkpoint
-      // running code.  We should never see this.
-      const uint32_t kWaitTimeoutMs = 10000;
-
-      // Wait for all threads to pass the barrier.
-      bool timed_out =  profiler->profiler_barrier_->Increment(self, barrier_count, kWaitTimeoutMs);
-
-      // We should never get a timeout.  If we do, it suggests a problem with the checkpoint
-      // code.  Crash the process in this case.
-      CHECK(!timed_out);
-
-      // Update the current time.
-      now_us = MicroTime();
-    }
-
-    if (valid_samples > 0) {
-      // After the profile has been taken, write it out.
-      ScopedObjectAccess soa(self);   // Acquire the mutator lock.
-      uint32_t size = profiler->WriteProfile();
-      VLOG(profiler) << "Profile size: " << size;
-    }
-  }
-
-  LOG(INFO) << "Profiler shutdown";
-  runtime->DetachCurrentThread();
-  return nullptr;
-}
-
-// Write out the profile file if we are generating a profile.
-uint32_t BackgroundMethodSamplingProfiler::WriteProfile() {
-  std::string full_name = output_filename_;
-  VLOG(profiler) << "Saving profile to " << full_name;
-
-  int fd = open(full_name.c_str(), O_RDWR);
-  if (fd < 0) {
-    // Open failed.
-    LOG(ERROR) << "Failed to open profile file " << full_name;
-    return 0;
-  }
-
-  // Lock the file for exclusive access.  This will block if another process is using
-  // the file.
-  int err = flock(fd, LOCK_EX);
-  if (err < 0) {
-    LOG(ERROR) << "Failed to lock profile file " << full_name;
-    return 0;
-  }
-
-  // Read the previous profile.
-  profile_table_.ReadPrevious(fd, options_.GetProfileType());
-
-  // Move back to the start of the file.
-  lseek(fd, 0, SEEK_SET);
-
-  // Format the profile output and write to the file.
-  std::ostringstream os;
-  uint32_t num_methods = DumpProfile(os);
-  std::string data(os.str());
-  const char *p = data.c_str();
-  size_t length = data.length();
-  size_t full_length = length;
-  do {
-    int n = ::write(fd, p, length);
-    p += n;
-    length -= n;
-  } while (length > 0);
-
-  // Truncate the file to the new length.
-  if (ftruncate(fd, full_length) == -1) {
-    LOG(ERROR) << "Failed to truncate profile file " << full_name;
-  }
-
-  // Now unlock the file, allowing another process in.
-  err = flock(fd, LOCK_UN);
-  if (err < 0) {
-    LOG(ERROR) << "Failed to unlock profile file " << full_name;
-  }
-
-  // Done, close the file.
-  ::close(fd);
-
-  // Clean the profile for the next time.
-  CleanProfile();
-
-  return num_methods;
-}
-
-bool BackgroundMethodSamplingProfiler::Start(
-    const std::string& output_filename, const ProfilerOptions& options) {
-  if (!options.IsEnabled()) {
-    return false;
-  }
-
-  CHECK(!output_filename.empty());
-
-  Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, *Locks::profiler_lock_);
-    // Don't start two profiler threads.
-    if (profiler_ != nullptr) {
-      return true;
-    }
-  }
-
-  LOG(INFO) << "Starting profiler using output file: " << output_filename
-            << " and options: " << options;
-  {
-    MutexLock mu(self, *Locks::profiler_lock_);
-    profiler_ = new BackgroundMethodSamplingProfiler(output_filename, options);
-
-    CHECK_PTHREAD_CALL(pthread_create, (&profiler_pthread_, nullptr, &RunProfilerThread,
-        reinterpret_cast<void*>(profiler_)),
-                       "Profiler thread");
-  }
-  return true;
-}
-
-
-
-void BackgroundMethodSamplingProfiler::Stop() {
-  BackgroundMethodSamplingProfiler* profiler = nullptr;
-  pthread_t profiler_pthread = 0U;
-  {
-    MutexLock trace_mu(Thread::Current(), *Locks::profiler_lock_);
-    CHECK(!shutting_down_);
-    profiler = profiler_;
-    shutting_down_ = true;
-    profiler_pthread = profiler_pthread_;
-  }
-
-  // Now wake up the sampler thread if it sleeping.
-  {
-    MutexLock profile_mu(Thread::Current(), profiler->wait_lock_);
-    profiler->period_condition_.Signal(Thread::Current());
-  }
-  // Wait for the sample thread to stop.
-  CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profiler thread shutdown");
-
-  {
-    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    profiler_ = nullptr;
-  }
-  delete profiler;
-}
-
-
-void BackgroundMethodSamplingProfiler::Shutdown() {
-  Stop();
-}
-
-BackgroundMethodSamplingProfiler::BackgroundMethodSamplingProfiler(
-  const std::string& output_filename, const ProfilerOptions& options)
-    : output_filename_(output_filename),
-      options_(options),
-      wait_lock_("Profile wait lock"),
-      period_condition_("Profile condition", wait_lock_),
-      profile_table_(wait_lock_),
-      profiler_barrier_(new Barrier(0)) {
-  // Populate the filtered_methods set.
-  // This is empty right now, but to add a method, do this:
-  //
-  // filtered_methods_.insert("void java.lang.Object.wait(long, int)");
-}
-
-// Filter out methods the profiler doesn't want to record.
-// We require mutator lock since some statistics will be updated here.
-bool BackgroundMethodSamplingProfiler::ProcessMethod(ArtMethod* method) {
-  if (method == nullptr) {
-    profile_table_.NullMethod();
-    // Don't record a null method.
-    return false;
-  }
-
-  mirror::Class* cls = method->GetDeclaringClass();
-  if (cls != nullptr) {
-    if (cls->GetClassLoader() == nullptr) {
-      // Don't include things in the boot
-      profile_table_.BootMethod();
-      return false;
-    }
-  }
-
-  bool is_filtered = false;
-
-  if (strcmp(method->GetName(), "<clinit>") == 0) {
-    // always filter out class init
-    is_filtered = true;
-  }
-
-  // Filter out methods by name if there are any.
-  if (!is_filtered && filtered_methods_.size() > 0) {
-    std::string method_full_name = PrettyMethod(method);
-
-    // Don't include specific filtered methods.
-    is_filtered = filtered_methods_.count(method_full_name) != 0;
-  }
-  return !is_filtered;
-}
-
-// A method has been hit, record its invocation in the method map.
-// The mutator_lock must be held (shared) when this is called.
-void BackgroundMethodSamplingProfiler::RecordMethod(ArtMethod* method) {
-  // Add to the profile table unless it is filtered out.
-  if (ProcessMethod(method)) {
-    profile_table_.Put(method);
-  }
-}
-
-// Record the current bounded stack into sampling results.
-void BackgroundMethodSamplingProfiler::RecordStack(const std::vector<InstructionLocation>& stack) {
-  if (stack.size() == 0) {
-    return;
-  }
-  // Get the method on top of the stack. We use this method to perform filtering.
-  ArtMethod* method = stack.front().first;
-  if (ProcessMethod(method)) {
-      profile_table_.PutStack(stack);
-  }
-}
-
-// Clean out any recordings for the method traces.
-void BackgroundMethodSamplingProfiler::CleanProfile() {
-  profile_table_.Clear();
-}
-
-uint32_t BackgroundMethodSamplingProfiler::DumpProfile(std::ostream& os) {
-  return profile_table_.Write(os, options_.GetProfileType());
-}
-
-// Profile Table.
-// This holds a mapping of ArtMethod* to a count of how many times a sample
-// hit it at the top of the stack.
-ProfileSampleResults::ProfileSampleResults(Mutex& lock)
-    : lock_(lock),
-      num_samples_(0U),
-      num_null_methods_(0U),
-      num_boot_methods_(0U),
-      previous_num_samples_(0U),
-      previous_num_null_methods_(0U),
-      previous_num_boot_methods_(0U) {
-  for (int i = 0; i < kHashSize; i++) {
-    table[i] = nullptr;
-  }
-  method_context_table = nullptr;
-  stack_trie_root_ = nullptr;
-}
-
-ProfileSampleResults::~ProfileSampleResults() {
-  Clear();
-}
-
-// Add a method to the profile table.  If it's the first time the method
-// has been seen, add it with count=1, otherwise increment the count.
-void ProfileSampleResults::Put(ArtMethod* method) {
-  MutexLock mu(Thread::Current(), lock_);
-  uint32_t index = Hash(method);
-  if (table[index] == nullptr) {
-    table[index] = new Map();
-  }
-  Map::iterator i = table[index]->find(method);
-  if (i == table[index]->end()) {
-    (*table[index])[method] = 1;
-  } else {
-    i->second++;
-  }
-  num_samples_++;
-}
-
-// Add a bounded stack to the profile table. Only the count of the method on
-// top of the frame will be increased.
-void ProfileSampleResults::PutStack(const std::vector<InstructionLocation>& stack) {
-  MutexLock mu(Thread::Current(), lock_);
-  ScopedObjectAccess soa(Thread::Current());
-  if (stack_trie_root_ == nullptr) {
-    // The root of the stack trie is a dummy node so that we don't have to maintain
-    // a collection of tries.
-    stack_trie_root_ = new StackTrieNode();
-  }
-
-  StackTrieNode* current = stack_trie_root_;
-  if (stack.size() == 0) {
-    current->IncreaseCount();
-    return;
-  }
-
-  for (std::vector<InstructionLocation>::const_reverse_iterator iter = stack.rbegin();
-       iter != stack.rend(); ++iter) {
-    InstructionLocation inst_loc = *iter;
-    ArtMethod* method = inst_loc.first;
-    if (method == nullptr) {
-      // skip null method
-      continue;
-    }
-    uint32_t dex_pc = inst_loc.second;
-    uint32_t method_idx = method->GetDexMethodIndex();
-    const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-    MethodReference method_ref(dex_file, method_idx);
-    StackTrieNode* child = current->FindChild(method_ref, dex_pc);
-    if (child != nullptr) {
-      current = child;
-    } else {
-      uint32_t method_size = 0;
-      const DexFile::CodeItem* codeitem = method->GetCodeItem();
-      if (codeitem != nullptr) {
-        method_size = codeitem->insns_size_in_code_units_;
-      }
-      StackTrieNode* new_node = new StackTrieNode(method_ref, dex_pc, method_size, current);
-      current->AppendChild(new_node);
-      current = new_node;
-    }
-  }
-
-  if (current != stack_trie_root_ && current->GetCount() == 0) {
-    // Insert into method_context table;
-    if (method_context_table == nullptr) {
-      method_context_table = new MethodContextMap();
-    }
-    MethodReference method = current->GetMethod();
-    MethodContextMap::iterator i = method_context_table->find(method);
-    if (i == method_context_table->end()) {
-      TrieNodeSet* node_set = new TrieNodeSet();
-      node_set->insert(current);
-      (*method_context_table)[method] = node_set;
-    } else {
-      TrieNodeSet* node_set = i->second;
-      node_set->insert(current);
-    }
-  }
-  current->IncreaseCount();
-  num_samples_++;
-}
-
-// Write the profile table to the output stream.  Also merge with the previous profile.
-uint32_t ProfileSampleResults::Write(std::ostream& os, ProfileDataType type) {
-  ScopedObjectAccess soa(Thread::Current());
-  num_samples_ += previous_num_samples_;
-  num_null_methods_ += previous_num_null_methods_;
-  num_boot_methods_ += previous_num_boot_methods_;
-
-  VLOG(profiler) << "Profile: "
-                 << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
-  os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
-  uint32_t num_methods = 0;
-  if (type == kProfilerMethod) {
-    for (int i = 0 ; i < kHashSize; i++) {
-      Map *map = table[i];
-      if (map != nullptr) {
-        for (const auto &meth_iter : *map) {
-          ArtMethod *method = meth_iter.first;
-          std::string method_name = PrettyMethod(method);
-
-          const DexFile::CodeItem* codeitem = method->GetCodeItem();
-          uint32_t method_size = 0;
-          if (codeitem != nullptr) {
-            method_size = codeitem->insns_size_in_code_units_;
-          }
-          uint32_t count = meth_iter.second;
-
-          // Merge this profile entry with one from a previous run (if present).  Also
-          // remove the previous entry.
-          PreviousProfile::iterator pi = previous_.find(method_name);
-          if (pi != previous_.end()) {
-            count += pi->second.count_;
-            previous_.erase(pi);
-          }
-          os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
-          ++num_methods;
-        }
-      }
-    }
-  } else if (type == kProfilerBoundedStack) {
-    if (method_context_table != nullptr) {
-      for (const auto &method_iter : *method_context_table) {
-        MethodReference method = method_iter.first;
-        TrieNodeSet* node_set = method_iter.second;
-        std::string method_name = PrettyMethod(method.dex_method_index, *(method.dex_file));
-        uint32_t method_size = 0;
-        uint32_t total_count = 0;
-        PreviousContextMap new_context_map;
-        for (const auto &trie_node_i : *node_set) {
-          StackTrieNode* node = trie_node_i;
-          method_size = node->GetMethodSize();
-          uint32_t count = node->GetCount();
-          uint32_t dexpc = node->GetDexPC();
-          total_count += count;
-
-          StackTrieNode* current = node->GetParent();
-          // We go backward on the trie to retrieve context and dex_pc until the dummy root.
-          // The format of the context is "method_1@pc_1@method_2@pc_2@..."
-          std::vector<std::string> context_vector;
-          while (current != nullptr && current->GetParent() != nullptr) {
-            context_vector.push_back(StringPrintf("%s@%u",
-                PrettyMethod(current->GetMethod().dex_method_index, *(current->GetMethod().dex_file)).c_str(),
-                current->GetDexPC()));
-            current = current->GetParent();
-          }
-          std::string context_sig = Join(context_vector, '@');
-          new_context_map[std::make_pair(dexpc, context_sig)] = count;
-        }
-
-        PreviousProfile::iterator pi = previous_.find(method_name);
-        if (pi != previous_.end()) {
-          total_count += pi->second.count_;
-          PreviousContextMap* previous_context_map = pi->second.context_map_;
-          if (previous_context_map != nullptr) {
-            for (const auto &context_i : *previous_context_map) {
-              uint32_t count = context_i.second;
-              PreviousContextMap::iterator ci = new_context_map.find(context_i.first);
-              if (ci == new_context_map.end()) {
-                new_context_map[context_i.first] = count;
-              } else {
-                ci->second += count;
-              }
-            }
-          }
-          delete previous_context_map;
-          previous_.erase(pi);
-        }
-        // We write out profile data with dex pc and context information in the following format:
-        // "method/total_count/size/[pc_1:count_1:context_1#pc_2:count_2:context_2#...]".
-        std::vector<std::string> context_count_vector;
-        for (const auto &context_i : new_context_map) {
-          context_count_vector.push_back(StringPrintf("%u:%u:%s", context_i.first.first,
-              context_i.second, context_i.first.second.c_str()));
-        }
-        os << StringPrintf("%s/%u/%u/[%s]\n", method_name.c_str(), total_count,
-            method_size, Join(context_count_vector, '#').c_str());
-        ++num_methods;
-      }
-    }
-  }
-
-  // Now we write out the remaining previous methods.
-  for (const auto &pi : previous_) {
-    if (type == kProfilerMethod) {
-      os << StringPrintf("%s/%u/%u\n",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
-    } else if (type == kProfilerBoundedStack) {
-      os << StringPrintf("%s/%u/%u/[",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
-      PreviousContextMap* previous_context_map = pi.second.context_map_;
-      if (previous_context_map != nullptr) {
-        std::vector<std::string> context_count_vector;
-        for (const auto &context_i : *previous_context_map) {
-          context_count_vector.push_back(StringPrintf("%u:%u:%s", context_i.first.first,
-              context_i.second, context_i.first.second.c_str()));
-        }
-        os << Join(context_count_vector, '#');
-      }
-      os << "]\n";
-    }
-    ++num_methods;
-  }
-  return num_methods;
-}
-
-void ProfileSampleResults::Clear() {
-  num_samples_ = 0;
-  num_null_methods_ = 0;
-  num_boot_methods_ = 0;
-  for (int i = 0; i < kHashSize; i++) {
-    delete table[i];
-    table[i] = nullptr;
-  }
-  if (stack_trie_root_ != nullptr) {
-    stack_trie_root_->DeleteChildren();
-    delete stack_trie_root_;
-    stack_trie_root_ = nullptr;
-    if (method_context_table != nullptr) {
-      delete method_context_table;
-      method_context_table = nullptr;
-    }
-  }
-  for (auto &pi : previous_) {
-    if (pi.second.context_map_ != nullptr) {
-      delete pi.second.context_map_;
-      pi.second.context_map_ = nullptr;
-    }
-  }
-  previous_.clear();
-}
-
-uint32_t ProfileSampleResults::Hash(ArtMethod* method) {
-  return (PointerToLowMemUInt32(method) >> 3) % kHashSize;
-}
-
-// Read a single line into the given string.  Returns true if everything OK, false
-// on EOF or error.
-static bool ReadProfileLine(int fd, std::string& line) {
-  char buf[4];
-  line.clear();
-  while (true) {
-    int n = read(fd, buf, 1);     // TODO: could speed this up but is it worth it?
-    if (n != 1) {
-      return false;
-    }
-    if (buf[0] == '\n') {
-      break;
-    }
-    line += buf[0];
-  }
-  return true;
-}
-
-void ProfileSampleResults::ReadPrevious(int fd, ProfileDataType type) {
-  // Reset counters.
-  previous_num_samples_ = previous_num_null_methods_ = previous_num_boot_methods_ = 0;
-
-  std::string line;
-
-  // The first line contains summary information.
-  if (!ReadProfileLine(fd, line)) {
-    return;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', &summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be count/nullcount/bootcount
-    return;
-  }
-  previous_num_samples_ = strtoul(summary_info[0].c_str(), nullptr, 10);
-  previous_num_null_methods_ = strtoul(summary_info[1].c_str(), nullptr, 10);
-  previous_num_boot_methods_ = strtoul(summary_info[2].c_str(), nullptr, 10);
-
-  // Now read each line until the end of file.  Each line consists of 3 or 4 fields separated by /
-  while (true) {
-    if (!ReadProfileLine(fd, line)) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', &info);
-    if (info.size() != 3 && info.size() != 4) {
-      // Malformed.
-      break;
-    }
-    std::string methodname = info[0];
-    uint32_t total_count = strtoul(info[1].c_str(), nullptr, 10);
-    uint32_t size = strtoul(info[2].c_str(), nullptr, 10);
-    PreviousContextMap* context_map = nullptr;
-    if (type == kProfilerBoundedStack && info.size() == 4) {
-      context_map = new PreviousContextMap();
-      std::string context_counts_str = info[3].substr(1, info[3].size() - 2);
-      std::vector<std::string> context_count_pairs;
-      Split(context_counts_str, '#', &context_count_pairs);
-      for (uint32_t i = 0; i < context_count_pairs.size(); ++i) {
-        std::vector<std::string> context_count;
-        Split(context_count_pairs[i], ':', &context_count);
-        if (context_count.size() == 2) {
-          // Handles the situtation when the profile file doesn't contain context information.
-          uint32_t dexpc = strtoul(context_count[0].c_str(), nullptr, 10);
-          uint32_t count = strtoul(context_count[1].c_str(), nullptr, 10);
-          (*context_map)[std::make_pair(dexpc, "")] = count;
-        } else {
-          // Handles the situtation when the profile file contains context information.
-          uint32_t dexpc = strtoul(context_count[0].c_str(), nullptr, 10);
-          uint32_t count = strtoul(context_count[1].c_str(), nullptr, 10);
-          std::string context = context_count[2];
-          (*context_map)[std::make_pair(dexpc, context)] = count;
-        }
-      }
-    }
-    previous_[methodname] = PreviousValue(total_count, size, context_map);
-  }
-}
-
-bool ProfileFile::LoadFile(const std::string& fileName) {
-  LOG(VERBOSE) << "reading profile file " << fileName;
-  struct stat st;
-  int err = stat(fileName.c_str(), &st);
-  if (err == -1) {
-    LOG(VERBOSE) << "not found";
-    return false;
-  }
-  if (st.st_size == 0) {
-    return false;  // Empty profiles are invalid.
-  }
-  std::ifstream in(fileName.c_str());
-  if (!in) {
-    LOG(VERBOSE) << "profile file " << fileName << " exists but can't be opened";
-    LOG(VERBOSE) << "file owner: " << st.st_uid << ":" << st.st_gid;
-    LOG(VERBOSE) << "me: " << getuid() << ":" << getgid();
-    LOG(VERBOSE) << "file permissions: " << std::oct << st.st_mode;
-    LOG(VERBOSE) << "errno: " << errno;
-    return false;
-  }
-  // The first line contains summary information.
-  std::string line;
-  std::getline(in, line);
-  if (in.eof()) {
-    return false;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', &summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be total/null/boot.
-    return false;
-  }
-  // This is the number of hits in all profiled methods (without null or boot methods)
-  uint32_t total_count = strtoul(summary_info[0].c_str(), nullptr, 10);
-
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
-  // Store the info in descending order given by the most used methods.
-  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
-  ProfileSet countSet;
-  while (!in.eof()) {
-    std::getline(in, line);
-    if (in.eof()) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', &info);
-    if (info.size() != 3 && info.size() != 4) {
-      // Malformed.
-      return false;
-    }
-    int count = atoi(info[1].c_str());
-    countSet.insert(std::make_pair(-count, info));
-  }
-
-  uint32_t curTotalCount = 0;
-  ProfileSet::iterator end = countSet.end();
-  const ProfileData* prevData = nullptr;
-  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
-    const std::string& methodname = it->second[0];
-    uint32_t count = -it->first;
-    uint32_t size = strtoul(it->second[2].c_str(), nullptr, 10);
-    double usedPercent = (count * 100.0) / total_count;
-
-    curTotalCount += count;
-    // Methods with the same count should be part of the same top K percentage bucket.
-    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
-      ? prevData->GetTopKUsedPercentage()
-      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
-
-    // Add it to the profile map.
-    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
-    profile_map_[methodname] = curData;
-    prevData = &curData;
-  }
-  return true;
-}
-
-bool ProfileFile::GetProfileData(ProfileFile::ProfileData* data, const std::string& method_name) {
-  ProfileMap::iterator i = profile_map_.find(method_name);
-  if (i == profile_map_.end()) {
-    return false;
-  }
-  *data = i->second;
-  return true;
-}
-
-bool ProfileFile::GetTopKSamples(std::set<std::string>& topKSamples, double topKPercentage) {
-  ProfileMap::iterator end = profile_map_.end();
-  for (ProfileMap::iterator it = profile_map_.begin(); it != end; it++) {
-    if (it->second.GetTopKUsedPercentage() < topKPercentage) {
-      topKSamples.insert(it->first);
-    }
-  }
-  return true;
-}
-
-StackTrieNode* StackTrieNode::FindChild(MethodReference method, uint32_t dex_pc) {
-  if (children_.size() == 0) {
-    return nullptr;
-  }
-  // Create a dummy node for searching.
-  StackTrieNode* node = new StackTrieNode(method, dex_pc, 0, nullptr);
-  std::set<StackTrieNode*, StackTrieNodeComparator>::iterator i = children_.find(node);
-  delete node;
-  return (i == children_.end()) ? nullptr : *i;
-}
-
-void StackTrieNode::DeleteChildren() {
-  for (auto &child : children_) {
-    if (child != nullptr) {
-      child->DeleteChildren();
-      delete child;
-    }
-  }
-}
-
-}  // namespace art
diff --git a/runtime/profiler.h b/runtime/profiler.h
deleted file mode 100644
index bd29f71..0000000
--- a/runtime/profiler.h
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_PROFILER_H_
-#define ART_RUNTIME_PROFILER_H_
-
-#include <memory>
-#include <ostream>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "barrier.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "globals.h"
-#include "instrumentation.h"
-#include "profiler_options.h"
-#include "os.h"
-#include "safe_map.h"
-#include "method_reference.h"
-
-namespace art {
-
-namespace mirror {
-  class Class;
-}  // namespace mirror
-class ArtMethod;
-class Thread;
-
-typedef std::pair<ArtMethod*, uint32_t> InstructionLocation;
-
-// This class stores the sampled bounded stacks in a trie structure. A path of the trie represents
-// a particular context with the method on top of the stack being a leaf or an internal node of the
-// trie rather than the root.
-class StackTrieNode {
- public:
-  StackTrieNode(MethodReference method, uint32_t dex_pc, uint32_t method_size,
-      StackTrieNode* parent) :
-      parent_(parent), method_(method), dex_pc_(dex_pc),
-      count_(0), method_size_(method_size) {
-  }
-  StackTrieNode() : parent_(nullptr), method_(nullptr, 0),
-      dex_pc_(0), count_(0), method_size_(0) {
-  }
-  StackTrieNode* GetParent() { return parent_; }
-  MethodReference GetMethod() { return method_; }
-  uint32_t GetCount() { return count_; }
-  uint32_t GetDexPC() { return dex_pc_; }
-  uint32_t GetMethodSize() { return method_size_; }
-  void AppendChild(StackTrieNode* child) { children_.insert(child); }
-  StackTrieNode* FindChild(MethodReference method, uint32_t dex_pc);
-  void DeleteChildren();
-  void IncreaseCount() { ++count_; }
-
- private:
-  // Comparator for stack trie node.
-  struct StackTrieNodeComparator {
-    bool operator()(StackTrieNode* node1, StackTrieNode* node2) const {
-      MethodReference mr1 = node1->GetMethod();
-      MethodReference mr2 = node2->GetMethod();
-      if (mr1.dex_file == mr2.dex_file) {
-        if (mr1.dex_method_index == mr2.dex_method_index) {
-          return node1->GetDexPC() < node2->GetDexPC();
-        } else {
-          return mr1.dex_method_index < mr2.dex_method_index;
-        }
-      } else {
-        return mr1.dex_file < mr2.dex_file;
-      }
-    }
-  };
-
-  std::set<StackTrieNode*, StackTrieNodeComparator> children_;
-  StackTrieNode* parent_;
-  MethodReference method_;
-  uint32_t dex_pc_;
-  uint32_t count_;
-  uint32_t method_size_;
-};
-
-//
-// This class holds all the results for all runs of the profiler.  It also
-// counts the number of null methods (where we can't determine the method) and
-// the number of methods in the boot path (where we have already compiled the method).
-//
-// This object is an internal profiler object and uses the same locking as the profiler
-// itself.
-class ProfileSampleResults {
- public:
-  explicit ProfileSampleResults(Mutex& lock);
-  ~ProfileSampleResults();
-
-  void Put(ArtMethod* method) REQUIRES(!lock_);
-  void PutStack(const std::vector<InstructionLocation>& stack_dump) REQUIRES(!lock_);
-  uint32_t Write(std::ostream &os, ProfileDataType type);
-  void ReadPrevious(int fd, ProfileDataType type);
-  void Clear();
-  uint32_t GetNumSamples() { return num_samples_; }
-  void NullMethod() { ++num_null_methods_; }
-  void BootMethod() { ++num_boot_methods_; }
-
- private:
-  uint32_t Hash(ArtMethod* method);
-  static constexpr int kHashSize = 17;
-  Mutex& lock_;                  // Reference to the main profiler lock - we don't need two of them.
-  uint32_t num_samples_;         // Total number of samples taken.
-  uint32_t num_null_methods_;    // Number of samples where can don't know the method.
-  uint32_t num_boot_methods_;    // Number of samples in the boot path.
-
-  typedef std::map<ArtMethod*, uint32_t> Map;  // Map of method vs its count.
-  Map *table[kHashSize];
-
-  typedef std::set<StackTrieNode*> TrieNodeSet;
-  // Map of method hit by profiler vs the set of stack trie nodes for this method.
-  typedef std::map<MethodReference, TrieNodeSet*, MethodReferenceComparator> MethodContextMap;
-  MethodContextMap *method_context_table;
-  StackTrieNode* stack_trie_root_;  // Root of the trie that stores sampled stack information.
-
-  // Map from <pc, context> to counts.
-  typedef std::map<std::pair<uint32_t, std::string>, uint32_t> PreviousContextMap;
-  struct PreviousValue {
-    PreviousValue() : count_(0), method_size_(0), context_map_(nullptr) {}
-    PreviousValue(uint32_t count, uint32_t method_size, PreviousContextMap* context_map)
-      : count_(count), method_size_(method_size), context_map_(context_map) {}
-    uint32_t count_;
-    uint32_t method_size_;
-    PreviousContextMap* context_map_;
-  };
-
-  typedef std::map<std::string, PreviousValue> PreviousProfile;
-  PreviousProfile previous_;
-  uint32_t previous_num_samples_;
-  uint32_t previous_num_null_methods_;     // Number of samples where can don't know the method.
-  uint32_t previous_num_boot_methods_;     // Number of samples in the boot path.
-};
-
-//
-// The BackgroundMethodSamplingProfiler runs in a thread.  Most of the time it is sleeping but
-// occasionally wakes up and counts the number of times a method is called.  Each time
-// it ticks, it looks at the current method and records it in the ProfileSampleResults
-// table.
-//
-// The timing is controlled by a number of variables:
-// 1.  Period: the time between sampling runs.
-// 2.  Interval: the time between each sample in a run.
-// 3.  Duration: the duration of a run.
-//
-// So the profiler thread is sleeping for the 'period' time.  It wakes up and runs for the
-// 'duration'.  The run consists of a series of samples, each of which is 'interval' microseconds
-// apart.  At the end of a run, it writes the results table to a file and goes back to sleep.
-
-class BackgroundMethodSamplingProfiler {
- public:
-  // Start a profile thread with the user-supplied arguments.
-  // Returns true if the profile was started or if it was already running. Returns false otherwise.
-  static bool Start(const std::string& output_filename, const ProfilerOptions& options)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_,
-               !Locks::profiler_lock_);
-
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void Stop() REQUIRES(!Locks::profiler_lock_, !wait_lock_, !Locks::profiler_lock_)
-      NO_THREAD_SAFETY_ANALYSIS;
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void Shutdown() REQUIRES(!Locks::profiler_lock_) NO_THREAD_SAFETY_ANALYSIS;
-
-  void RecordMethod(ArtMethod *method) SHARED_REQUIRES(Locks::mutator_lock_);
-  void RecordStack(const std::vector<InstructionLocation>& stack)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool ProcessMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
-  const ProfilerOptions& GetProfilerOptions() const { return options_; }
-
-  Barrier& GetBarrier() {
-    return *profiler_barrier_;
-  }
-
- private:
-  explicit BackgroundMethodSamplingProfiler(
-    const std::string& output_filename, const ProfilerOptions& options);
-
-  // The sampling interval in microseconds is passed as an argument.
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void* RunProfilerThread(void* arg) REQUIRES(!Locks::profiler_lock_)
-      NO_THREAD_SAFETY_ANALYSIS;
-
-  uint32_t WriteProfile() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void CleanProfile();
-  uint32_t DumpProfile(std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_);
-  static bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
-
-  static BackgroundMethodSamplingProfiler* profiler_ GUARDED_BY(Locks::profiler_lock_);
-
-  // We need to shut the sample thread down at exit.  Setting this to true will do that.
-  static volatile bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
-
-  // Sampling thread, non-zero when sampling.
-  static pthread_t profiler_pthread_;
-
-  // Some measure of the number of samples that are significant.
-  static constexpr uint32_t kSignificantSamples = 10;
-
-  // The name of the file where profile data will be written.
-  std::string output_filename_;
-  // The options used to start the profiler.
-  const ProfilerOptions& options_;
-
-
-  // Profile condition support.
-  Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  ConditionVariable period_condition_ GUARDED_BY(wait_lock_);
-
-  ProfileSampleResults profile_table_;
-
-  std::unique_ptr<Barrier> profiler_barrier_;
-
-  // Set of methods to be filtered out.  This will probably be rare because
-  // most of the methods we want to be filtered reside in the boot path and
-  // are automatically filtered.
-  typedef std::set<std::string> FilteredMethods;
-  FilteredMethods filtered_methods_;
-
-  DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler);
-};
-
-//
-// Contains profile data generated from previous runs of the program and stored
-// in a file.  It is used to determine whether to compile a particular method or not.
-class ProfileFile {
- public:
-  class ProfileData {
-   public:
-    ProfileData() : count_(0), method_size_(0), used_percent_(0), top_k_used_percentage_(0) {}
-    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
-      double used_percent, double top_k_used_percentage) :
-      method_name_(method_name), count_(count), method_size_(method_size),
-      used_percent_(used_percent), top_k_used_percentage_(top_k_used_percentage) {
-      // TODO: currently method_size_ is unused
-      UNUSED(method_size_);
-    }
-
-    double GetUsedPercent() const { return used_percent_; }
-    uint32_t GetCount() const { return count_; }
-    double GetTopKUsedPercentage() const { return top_k_used_percentage_; }
-
-   private:
-    std::string method_name_;       // Method name.
-    uint32_t count_;                // Number of times it has been called.
-    uint32_t method_size_;          // Size of the method on dex instructions.
-    double used_percent_;           // Percentage of how many times this method was called.
-    double top_k_used_percentage_;  // The percentage of the group that comprise K% of the total
-                                    // used methods this methods belongs to.
-  };
-
- public:
-  // Loads profile data from the given file. The new data are merged with any existing data.
-  // Returns true if the file was loaded successfully and false otherwise.
-  bool LoadFile(const std::string& filename);
-
-  // Computes the group that comprise top_k_percentage of the total used methods.
-  bool GetTopKSamples(std::set<std::string>& top_k_methods, double top_k_percentage);
-
-  // If the given method has an entry in the profile table it updates the data
-  // and returns true. Otherwise returns false and leaves the data unchanged.
-  bool GetProfileData(ProfileData* data, const std::string& method_name);
-
- private:
-  // Profile data is stored in a map, indexed by the full method name.
-  typedef std::map<std::string, ProfileData> ProfileMap;
-  ProfileMap profile_map_;
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_PROFILER_H_
diff --git a/runtime/profiler_options.h b/runtime/profiler_options.h
deleted file mode 100644
index 1db2f05..0000000
--- a/runtime/profiler_options.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_PROFILER_OPTIONS_H_
-#define ART_RUNTIME_PROFILER_OPTIONS_H_
-
-#include <string>
-#include <ostream>
-
-namespace art {
-
-enum ProfileDataType {
-  kProfilerMethod,          // Method only
-  kProfilerBoundedStack,    // Methods with Dex PC on top of the stack
-};
-std::ostream& operator<<(std::ostream& os, const ProfileDataType& rhs);
-
-class ProfilerOptions {
- public:
-  static constexpr bool kDefaultEnabled = false;
-  static constexpr uint32_t kDefaultPeriodS = 10;
-  static constexpr uint32_t kDefaultDurationS = 20;
-  static constexpr uint32_t kDefaultIntervalUs = 500;
-  static constexpr double kDefaultBackoffCoefficient = 2.0;
-  static constexpr bool kDefaultStartImmediately = false;
-  static constexpr double kDefaultTopKThreshold = 90.0;
-  static constexpr double kDefaultChangeInTopKThreshold = 10.0;
-  static constexpr ProfileDataType kDefaultProfileData = kProfilerMethod;
-  static constexpr uint32_t kDefaultMaxStackDepth = 3;
-
-  ProfilerOptions() :
-    enabled_(kDefaultEnabled),
-    period_s_(kDefaultPeriodS),
-    duration_s_(kDefaultDurationS),
-    interval_us_(kDefaultIntervalUs),
-    backoff_coefficient_(kDefaultBackoffCoefficient),
-    start_immediately_(kDefaultStartImmediately),
-    top_k_threshold_(kDefaultTopKThreshold),
-    top_k_change_threshold_(kDefaultChangeInTopKThreshold),
-    profile_type_(kDefaultProfileData),
-    max_stack_depth_(kDefaultMaxStackDepth) {}
-
-  ProfilerOptions(bool enabled,
-                 uint32_t period_s,
-                 uint32_t duration_s,
-                 uint32_t interval_us,
-                 double backoff_coefficient,
-                 bool start_immediately,
-                 double top_k_threshold,
-                 double top_k_change_threshold,
-                 ProfileDataType profile_type,
-                 uint32_t max_stack_depth):
-    enabled_(enabled),
-    period_s_(period_s),
-    duration_s_(duration_s),
-    interval_us_(interval_us),
-    backoff_coefficient_(backoff_coefficient),
-    start_immediately_(start_immediately),
-    top_k_threshold_(top_k_threshold),
-    top_k_change_threshold_(top_k_change_threshold),
-    profile_type_(profile_type),
-    max_stack_depth_(max_stack_depth) {}
-
-  bool IsEnabled() const {
-    return enabled_;
-  }
-
-  uint32_t GetPeriodS() const {
-    return period_s_;
-  }
-
-  uint32_t GetDurationS() const {
-    return duration_s_;
-  }
-
-  uint32_t GetIntervalUs() const {
-    return interval_us_;
-  }
-
-  double GetBackoffCoefficient() const {
-    return backoff_coefficient_;
-  }
-
-  bool GetStartImmediately() const {
-    return start_immediately_;
-  }
-
-  double GetTopKThreshold() const {
-    return top_k_threshold_;
-  }
-
-  double GetTopKChangeThreshold() const {
-    return top_k_change_threshold_;
-  }
-
-  ProfileDataType GetProfileType() const {
-    return profile_type_;
-  }
-
-  uint32_t GetMaxStackDepth() const {
-    return max_stack_depth_;
-  }
-
- private:
-  friend std::ostream & operator<<(std::ostream &os, const ProfilerOptions& po) {
-    os << "enabled=" << po.enabled_
-       << ", period_s=" << po.period_s_
-       << ", duration_s=" << po.duration_s_
-       << ", interval_us=" << po.interval_us_
-       << ", backoff_coefficient=" << po.backoff_coefficient_
-       << ", start_immediately=" << po.start_immediately_
-       << ", top_k_threshold=" << po.top_k_threshold_
-       << ", top_k_change_threshold=" << po.top_k_change_threshold_
-       << ", profile_type=" << po.profile_type_
-       << ", max_stack_depth=" << po.max_stack_depth_;
-    return os;
-  }
-
-  friend class ParsedOptions;
-
-  // Whether or not the applications should be profiled.
-  bool enabled_;
-  // Generate profile every n seconds.
-  uint32_t period_s_;
-  // Run profile for n seconds.
-  uint32_t duration_s_;
-  // Microseconds between samples.
-  uint32_t interval_us_;
-  // Coefficient to exponential backoff.
-  double backoff_coefficient_;
-  // Whether the profile should start upon app startup or be delayed by some random offset.
-  bool start_immediately_;
-  // Top K% of samples that are considered relevant when deciding if the app should be recompiled.
-  double top_k_threshold_;
-  // How much the top K% samples needs to change in order for the app to be recompiled.
-  double top_k_change_threshold_;
-  // The type of profile data dumped to the disk.
-  ProfileDataType profile_type_;
-  // The max depth of the stack collected by the profiler
-  uint32_t max_stack_depth_;
-};
-
-}  // namespace art
-
-
-#endif  // ART_RUNTIME_PROFILER_OPTIONS_H_
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 4d9ca6d..82e57b4 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -18,6 +18,7 @@
 #include <vector>
 
 #include "art_field-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
 #include "mirror/field-inl.h"
@@ -60,29 +61,31 @@
 
     jsize array_index = 0;
     // Fill the method array
+    DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
     ArtMethod* method = javaLangObject->FindDeclaredVirtualMethod(
-        "equals", "(Ljava/lang/Object;)Z", sizeof(void*));
+        "equals", "(Ljava/lang/Object;)Z", kRuntimePointerSize);
+    CHECK(method != nullptr);
+    DCHECK(!Runtime::Current()->IsActiveTransaction());
+    soa.Env()->SetObjectArrayElement(
+        proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
+            mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), method)));
+    method = javaLangObject->FindDeclaredVirtualMethod("hashCode", "()I", kRuntimePointerSize);
     CHECK(method != nullptr);
     soa.Env()->SetObjectArrayElement(
         proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
-    method = javaLangObject->FindDeclaredVirtualMethod("hashCode", "()I", sizeof(void*));
-    CHECK(method != nullptr);
-    soa.Env()->SetObjectArrayElement(
-        proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
+            mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), method)));
     method = javaLangObject->FindDeclaredVirtualMethod(
-        "toString", "()Ljava/lang/String;", sizeof(void*));
+        "toString", "()Ljava/lang/String;", kRuntimePointerSize);
     CHECK(method != nullptr);
     soa.Env()->SetObjectArrayElement(
         proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-            mirror::Method::CreateFromArtMethod(soa.Self(), method)));
+            mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), method)));
     // Now adds all interfaces virtual methods.
     for (mirror::Class* interface : interfaces) {
-      for (auto& m : interface->GetDeclaredVirtualMethods(sizeof(void*))) {
+      for (auto& m : interface->GetDeclaredVirtualMethods(kRuntimePointerSize)) {
         soa.Env()->SetObjectArrayElement(
             proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
-                mirror::Method::CreateFromArtMethod(soa.Self(), &m)));
+                mirror::Method::CreateFromArtMethod<kRuntimePointerSize, false>(soa.Self(), &m)));
       }
     }
     CHECK_EQ(array_index, methods_count);
@@ -226,14 +229,20 @@
   EXPECT_EQ(static_fields1->At(0).GetDeclaringClass(), proxyClass1.Get());
   EXPECT_EQ(static_fields1->At(1).GetDeclaringClass(), proxyClass1.Get());
 
+  ASSERT_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
+  ASSERT_FALSE(Runtime::Current()->IsActiveTransaction());
   Handle<mirror::Field> field00 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields0->At(0), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<kRuntimePointerSize, false>(
+          soa.Self(), &static_fields0->At(0), true));
   Handle<mirror::Field> field01 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields0->At(1), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<kRuntimePointerSize, false>(
+          soa.Self(), &static_fields0->At(1), true));
   Handle<mirror::Field> field10 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields1->At(0), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<kRuntimePointerSize, false>(
+          soa.Self(), &static_fields1->At(0), true));
   Handle<mirror::Field> field11 =
-      hs.NewHandle(mirror::Field::CreateFromArtField(soa.Self(), &static_fields1->At(1), true));
+      hs.NewHandle(mirror::Field::CreateFromArtField<kRuntimePointerSize, false>(
+          soa.Self(), &static_fields1->At(1), true));
   EXPECT_EQ(field00->GetArtField(), &static_fields0->At(0));
   EXPECT_EQ(field01->GetArtField(), &static_fields0->At(1));
   EXPECT_EQ(field10->GetArtField(), &static_fields1->At(0));
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 1dea562..a6e3693 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
@@ -145,7 +146,7 @@
   DCHECK_EQ(invoke_direct->VRegC_35c(),
             method->GetCodeItem()->registers_size_ - method->GetCodeItem()->ins_size_);
   uint32_t method_index = invoke_direct->VRegB_35c();
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   ArtMethod* target_method =
       method->GetDexCache()->GetResolvedMethod(method_index, pointer_size);
   if (kIsDebugBuild && target_method != nullptr) {
@@ -214,7 +215,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(IsInstructionIPut(new_iput->Opcode()));
   uint32_t field_index = new_iput->VRegC_22c();
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   mirror::DexCache* dex_cache = method->GetDexCache();
   ArtField* field = dex_cache->GetResolvedField(field_index, pointer_size);
   if (UNLIKELY(field == nullptr)) {
@@ -732,7 +733,7 @@
     return false;
   }
   mirror::DexCache* dex_cache = method->GetDexCache();
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   ArtField* field = dex_cache->GetResolvedField(field_idx, pointer_size);
   if (field == nullptr || field->IsStatic()) {
     return false;
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 237fdaa..46d9e7f 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -18,6 +18,7 @@
 
 #include "arch/context.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -50,7 +51,8 @@
       handler_method_(nullptr),
       handler_dex_pc_(0),
       clear_exception_(false),
-      handler_frame_depth_(kInvalidFrameDepth) {}
+      handler_frame_depth_(kInvalidFrameDepth),
+      full_fragment_done_(false) {}
 
 // Finds catch handler.
 class CatchBlockStackVisitor FINAL : public StackVisitor {
@@ -290,7 +292,8 @@
         single_frame_deopt_(single_frame),
         single_frame_done_(false),
         single_frame_deopt_method_(nullptr),
-        single_frame_deopt_quick_method_header_(nullptr) {
+        single_frame_deopt_quick_method_header_(nullptr),
+        callee_method_(nullptr) {
   }
 
   ArtMethod* GetSingleFrameDeoptMethod() const {
@@ -301,23 +304,34 @@
     return single_frame_deopt_quick_method_header_;
   }
 
+  void FinishStackWalk() SHARED_REQUIRES(Locks::mutator_lock_) {
+    // This is the upcall, or the next full frame in single-frame deopt, or the
+    // code isn't deoptimizeable. We remember the frame and last pc so that we
+    // may long jump to them.
+    exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
+    exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
+    exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
+    if (!stacked_shadow_frame_pushed_) {
+      // In case there is no deoptimized shadow frame for this upcall, we still
+      // need to push a nullptr to the stack since there is always a matching pop after
+      // the long jump.
+      GetThread()->PushStackedShadowFrame(nullptr,
+                                          StackedShadowFrameType::kDeoptimizationShadowFrame);
+      stacked_shadow_frame_pushed_ = true;
+    }
+    if (GetMethod() == nullptr) {
+      exception_handler_->SetFullFragmentDone(true);
+    } else {
+      CHECK(callee_method_ != nullptr) << art::PrettyMethod(GetMethod(), false);
+      exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(callee_method_));
+    }
+  }
+
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
     if (method == nullptr || single_frame_done_) {
-      // This is the upcall (or the next full frame in single-frame deopt), we remember the frame
-      // and last pc so that we may long jump to them.
-      exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
-      exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
-      exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
-      if (!stacked_shadow_frame_pushed_) {
-        // In case there is no deoptimized shadow frame for this upcall, we still
-        // need to push a nullptr to the stack since there is always a matching pop after
-        // the long jump.
-        GetThread()->PushStackedShadowFrame(nullptr,
-                                            StackedShadowFrameType::kDeoptimizationShadowFrame);
-        stacked_shadow_frame_pushed_ = true;
-      }
+      FinishStackWalk();
       return false;  // End stack walk.
     } else if (method->IsRuntimeMethod()) {
       // Ignore callee save method.
@@ -328,7 +342,14 @@
       // the native method.
       // The top method is a runtime method, the native method comes next.
       CHECK_EQ(GetFrameDepth(), 1U);
+      callee_method_ = method;
       return true;
+    } else if (!single_frame_deopt_ &&
+               !Runtime::Current()->IsDeoptimizeable(GetCurrentQuickFramePc())) {
+      // We hit some code that's not deoptimizeable. However, Single-frame deoptimization triggered
+      // from compiled code is always allowed since HDeoptimize always saves the full environment.
+      FinishStackWalk();
+      return false;  // End stack walk.
     } else {
       // Check if a shadow frame already exists for debugger's set-local-value purpose.
       const size_t frame_id = GetFrameId();
@@ -356,20 +377,17 @@
         // right before interpreter::EnterInterpreterFromDeoptimize().
         stacked_shadow_frame_pushed_ = true;
         GetThread()->PushStackedShadowFrame(
-            new_frame,
-            single_frame_deopt_
-                ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
-                : StackedShadowFrameType::kDeoptimizationShadowFrame);
+            new_frame, StackedShadowFrameType::kDeoptimizationShadowFrame);
       }
       prev_shadow_frame_ = new_frame;
 
       if (single_frame_deopt_ && !IsInInlinedFrame()) {
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
-        exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
         single_frame_deopt_method_ = method;
         single_frame_deopt_quick_method_header_ = GetCurrentOatQuickMethodHeader();
       }
+      callee_method_ = method;
       return true;
     }
   }
@@ -478,10 +496,26 @@
   bool single_frame_done_;
   ArtMethod* single_frame_deopt_method_;
   const OatQuickMethodHeader* single_frame_deopt_quick_method_header_;
+  ArtMethod* callee_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
 
+void QuickExceptionHandler::PrepareForLongJumpToInvokeStubOrInterpreterBridge() {
+  if (full_fragment_done_) {
+    // Restore deoptimization exception. When returning from the invoke stub,
+    // ArtMethod::Invoke() will see the special exception to know deoptimization
+    // is needed.
+    self_->SetException(Thread::GetDeoptimizationException());
+  } else {
+    // PC needs to be of the quick-to-interpreter bridge.
+    int32_t offset;
+    offset = GetThreadOffset<kRuntimePointerSize>(kQuickQuickToInterpreterBridge).Int32Value();
+    handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
+        reinterpret_cast<uint8_t*>(self_) + offset);
+  }
+}
+
 void QuickExceptionHandler::DeoptimizeStack() {
   DCHECK(is_deoptimization_);
   if (kDebugExceptionDelivery) {
@@ -490,9 +524,7 @@
 
   DeoptimizeStackVisitor visitor(self_, context_, this, false);
   visitor.WalkStack(true);
-
-  // Restore deoptimization exception
-  self_->SetException(Thread::GetDeoptimizationException());
+  PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
 void QuickExceptionHandler::DeoptimizeSingleFrame() {
@@ -518,20 +550,21 @@
         deopt_method, GetQuickToInterpreterBridge());
   }
 
-  // PC needs to be of the quick-to-interpreter bridge.
-  int32_t offset;
-  #ifdef __LP64__
-      offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
-  #else
-      offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
-  #endif
-  handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(self_) + offset);
+  PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
-void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() {
-  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
+void QuickExceptionHandler::DeoptimizePartialFragmentFixup(uintptr_t return_pc) {
+  // At this point, the instrumentation stack has been updated. We need to install
+  // the real return pc on stack, in case instrumentation stub is stored there,
+  // so that the interpreter bridge code can return to the right place.
+  if (return_pc != 0) {
+    uintptr_t* pc_addr = reinterpret_cast<uintptr_t*>(handler_quick_frame_);
+    CHECK(pc_addr != nullptr);
+    pc_addr--;
+    *reinterpret_cast<uintptr_t*>(pc_addr) = return_pc;
+  }
 
+  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
   if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) {
     // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to
     // change how longjump works.
@@ -581,7 +614,8 @@
   DISALLOW_COPY_AND_ASSIGN(InstrumentationStackVisitor);
 };
 
-void QuickExceptionHandler::UpdateInstrumentationStack() {
+uintptr_t QuickExceptionHandler::UpdateInstrumentationStack() {
+  uintptr_t return_pc = 0;
   if (method_tracing_active_) {
     InstrumentationStackVisitor visitor(self_, handler_frame_depth_);
     visitor.WalkStack(true);
@@ -589,9 +623,10 @@
     size_t instrumentation_frames_to_pop = visitor.GetInstrumentationFramesToPop();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     for (size_t i = 0; i < instrumentation_frames_to_pop; ++i) {
-      instrumentation->PopMethodForUnwind(self_, is_deoptimization_);
+      return_pc = instrumentation->PopMethodForUnwind(self_, is_deoptimization_);
     }
   }
+  return return_pc;
 }
 
 void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) {
@@ -611,7 +646,7 @@
 // Prints out methods with their type of frame.
 class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor {
  public:
-  DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
+  explicit DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         show_details_(show_details) {}
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index eedf83f..74b7d0d 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -46,15 +46,29 @@
   // Find the catch handler for the given exception.
   void FindCatch(mirror::Throwable* exception) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy"
-  // shadow frame that will be executed with the interpreter.
+  // Deoptimize the stack to the upcall/some code that's not deoptimizeable. For
+  // every compiled frame, we create a "copy" shadow frame that will be executed
+  // with the interpreter.
   void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Deoptimize a single frame. It's directly triggered from compiled code. It
+  // has the following properties:
+  // - It deoptimizes a single frame, which can include multiple inlined frames.
+  // - It doesn't have return result or pending exception at the deoptimization point.
+  // - It always deoptimizes, even if IsDeoptimizeable() returns false for the
+  //   code, since HDeoptimize always saves the full environment. So it overrides
+  //   the result of IsDeoptimizeable().
+  // - It can be either full-fragment, or partial-fragment deoptimization, depending
+  //   on whether that single frame covers full or partial fragment.
   void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_);
-  void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void DeoptimizePartialFragmentFixup(uintptr_t return_pc)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update the instrumentation stack by removing all methods that will be unwound
   // by the exception being thrown.
-  void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  // Return the return pc of the last frame that's unwound.
+  uintptr_t UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Set up environment before delivering an exception to optimized code.
   void SetCatchEnvironmentForOptimizedHandler(StackVisitor* stack_visitor)
@@ -103,8 +117,16 @@
     handler_frame_depth_ = frame_depth;
   }
 
+  bool IsFullFragmentDone() const {
+    return full_fragment_done_;
+  }
+
+  void SetFullFragmentDone(bool full_fragment_done) {
+    full_fragment_done_ = full_fragment_done;
+  }
+
   // Walk the stack frames of the given thread, printing out non-runtime methods with their types
-  // of frames. Helps to verify that single-frame deopt really only deopted one frame.
+  // of frames. Helps to verify that partial-fragment deopt really works as expected.
   static void DumpFramesWithType(Thread* self, bool details = false)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -131,6 +153,13 @@
   bool clear_exception_;
   // Frame depth of the catch handler or the upcall.
   size_t handler_frame_depth_;
+  // Does the handler successfully walk the full fragment (not stopped
+  // by some code that's not deoptimizeable)? Even single-frame deoptimization
+  // can set this to true if the fragment contains only one quick frame.
+  bool full_fragment_done_;
+
+  void PrepareForLongJumpToInvokeStubOrInterpreterBridge()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(QuickExceptionHandler);
 };
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 0c3eb3b..92efa21 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -220,7 +220,7 @@
 }
 
 inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) {
-  return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj);
+  return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj);
 }
 
 inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index b7bd99b..5d32c09 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -37,11 +37,10 @@
 
 class ReadBarrier {
  public:
-  // TODO: disable thse flags for production use.
   // Enable the to-space invariant checks.
-  static constexpr bool kEnableToSpaceInvariantChecks = true;
+  static constexpr bool kEnableToSpaceInvariantChecks = kIsDebugBuild;
   // Enable the read barrier checks.
-  static constexpr bool kEnableReadBarrierInvariantChecks = true;
+  static constexpr bool kEnableReadBarrierInvariantChecks = kIsDebugBuild;
 
   // It's up to the implementation whether the given field gets updated whereas the return value
   // must be an updated reference unless kAlwaysUpdateField is true.
@@ -100,8 +99,9 @@
   // Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
   static constexpr uintptr_t white_ptr_ = 0x0;    // Not marked.
   static constexpr uintptr_t gray_ptr_ = 0x1;     // Marked, but not marked through. On mark stack.
+  // TODO: black_ptr_ is unused, we should remove it.
   static constexpr uintptr_t black_ptr_ = 0x2;    // Marked through. Used for non-moving objects.
-  static constexpr uintptr_t rb_ptr_mask_ = 0x3;  // The low 2 bits for white|gray|black.
+  static constexpr uintptr_t rb_ptr_mask_ = 0x1;  // The low bits for white|gray.
 };
 
 }  // namespace art
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 28c27cd..8a531d9 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
@@ -222,7 +223,7 @@
     for (size_t i = 1, args_offset = 0; i < shorty_len_; ++i, ++args_offset) {
       mirror::Object* arg = args->Get(args_offset);
       if (((shorty_[i] == 'L') && (arg != nullptr)) || ((arg == nullptr && shorty_[i] != 'L'))) {
-        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* dst_class =
             m->GetClassFromTypeIndex(classes->GetTypeItem(args_offset).type_idx_,
                                      true /* resolve */,
@@ -358,7 +359,7 @@
   }
   // TODO: If args contain object references, it may cause problems.
   Thread* const self = Thread::Current();
-  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   for (uint32_t i = 0; i < num_params; i++) {
     uint16_t type_idx = params->GetTypeItem(i).type_idx_;
     mirror::Class* param_type = m->GetClassFromTypeIndex(type_idx,
@@ -424,7 +425,7 @@
 
 static ArtMethod* FindVirtualMethod(mirror::Object* receiver, ArtMethod* method)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(method, sizeof(void*));
+  return receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(method, kRuntimePointerSize);
 }
 
 
@@ -434,7 +435,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   uint32_t* args = arg_array->GetArray();
   if (UNLIKELY(soa.Env()->check_jni)) {
-    CheckMethodArguments(soa.Vm(), method->GetInterfaceMethodIfProxy(sizeof(void*)), args);
+    CheckMethodArguments(soa.Vm(), method->GetInterfaceMethodIfProxy(kRuntimePointerSize), args);
   }
   method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, shorty);
 }
@@ -458,7 +459,8 @@
   }
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
   uint32_t shorty_len = 0;
-  const char* shorty = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(&shorty_len);
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
   JValue result;
   ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromVarArgs(soa, receiver, args);
@@ -488,7 +490,8 @@
   }
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
   uint32_t shorty_len = 0;
-  const char* shorty = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(&shorty_len);
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
   JValue result;
   ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromJValues(soa, receiver, args);
@@ -519,7 +522,8 @@
     receiver = nullptr;
   }
   uint32_t shorty_len = 0;
-  const char* shorty = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(&shorty_len);
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
   JValue result;
   ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromJValues(soa, receiver, args);
@@ -550,7 +554,8 @@
     receiver = nullptr;
   }
   uint32_t shorty_len = 0;
-  const char* shorty = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(&shorty_len);
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
   JValue result;
   ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromVarArgs(soa, receiver, args);
@@ -602,13 +607,13 @@
       }
 
       // Find the actual implementation of the virtual method.
-      m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m, sizeof(void*));
+      m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m, kRuntimePointerSize);
     }
   }
 
   // Get our arrays of arguments and their types, and check they're the same size.
   auto* objects = soa.Decode<mirror::ObjectArray<mirror::Object>*>(javaArgs);
-  auto* np_method = m->GetInterfaceMethodIfProxy(sizeof(void*));
+  auto* np_method = m->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   const DexFile::TypeList* classes = np_method->GetParameterTypeList();
   uint32_t classes_size = (classes == nullptr) ? 0 : classes->Size();
   uint32_t arg_count = (objects != nullptr) ? objects->GetLength() : 0;
@@ -775,8 +780,9 @@
                                                  UnboxingFailureKind(f).c_str(),
                                                  PrettyDescriptor(dst_class).c_str()).c_str());
     } else {
-      ThrowNullPointerException(StringPrintf("Expected to unbox a '%s' primitive type but was returned null",
-                                             PrettyDescriptor(dst_class).c_str()).c_str());
+      ThrowNullPointerException(
+          StringPrintf("Expected to unbox a '%s' primitive type but was returned null",
+                       PrettyDescriptor(dst_class).c_str()).c_str());
     }
     return false;
   }
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index a098bf0..016f3c7 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -21,6 +21,7 @@
 #include "ScopedLocalRef.h"
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "common_compiler_test.h"
 #include "scoped_thread_state_change.h"
 
@@ -107,8 +108,8 @@
                                                 class_loader);
     CHECK(c != nullptr);
 
-    *method = is_static ? c->FindDirectMethod(method_name, method_signature, sizeof(void*))
-                        : c->FindVirtualMethod(method_name, method_signature, sizeof(void*));
+    *method = is_static ? c->FindDirectMethod(method_name, method_signature, kRuntimePointerSize)
+                        : c->FindVirtualMethod(method_name, method_signature, kRuntimePointerSize);
     CHECK(method != nullptr);
 
     if (is_static) {
@@ -517,7 +518,9 @@
   mirror::Class* klass = class_linker_->FindClass(soa.Self(), "LMain;", class_loader);
   ASSERT_TRUE(klass != nullptr);
 
-  ArtMethod* method = klass->FindDirectMethod("main", "([Ljava/lang/String;)V", sizeof(void*));
+  ArtMethod* method = klass->FindDirectMethod("main",
+                                              "([Ljava/lang/String;)V",
+                                              kRuntimePointerSize);
   ASSERT_TRUE(method != nullptr);
 
   // Start runtime.
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index bfa8c54..3245ba0 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -41,13 +41,15 @@
   DCHECK_NE(method, GetImtConflictMethod());
   DCHECK_NE(method, GetResolutionMethod());
   // Don't use GetCalleeSaveMethod(), some tests don't set all callee save methods.
-  if (method == GetCalleeSaveMethodUnchecked(Runtime::kRefsAndArgs)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAll)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAll);
+  if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveRefsAndArgs)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
+  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAllCalleeSaves)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAllCalleeSaves);
+  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveRefsOnly)) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsOnly);
   } else {
-    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kRefsOnly));
-    return GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly);
+    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kSaveEverything));
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveEverything);
   }
 }
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 66c8f87..457f203 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -55,12 +55,12 @@
 #include "atomic.h"
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
-#include "compiler_filter.h"
 #include "debugger.h"
 #include "elf_file.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
@@ -78,7 +78,6 @@
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "linear_alloc.h"
-#include "lambda/box_table.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -115,11 +114,11 @@
 #include "native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.h"
 #include "native/sun_misc_Unsafe.h"
 #include "native_bridge_art_interface.h"
+#include "native_stack_dump.h"
 #include "oat_file.h"
 #include "oat_file_manager.h"
 #include "os.h"
 #include "parsed_options.h"
-#include "profiler.h"
 #include "jit/profile_saver.h"
 #include "quick/quick_method_frame_info.h"
 #include "reflection.h"
@@ -131,6 +130,7 @@
 #include "signal_set.h"
 #include "thread.h"
 #include "thread_list.h"
+#include "ti/agent.h"
 #include "trace.h"
 #include "transaction.h"
 #include "utils.h"
@@ -282,6 +282,16 @@
     jit_->StopProfileSaver();
   }
 
+  // TODO Maybe do some locking.
+  for (auto& agent : agents_) {
+    agent.Unload();
+  }
+
+  // TODO Maybe do some locking
+  for (auto& plugin : plugins_) {
+    plugin.Unload();
+  }
+
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
   delete signal_catcher_;
@@ -443,7 +453,6 @@
   GetMonitorList()->SweepMonitorList(visitor);
   GetJavaVM()->SweepJniWeakGlobals(visitor);
   GetHeap()->SweepAllocationRecords(visitor);
-  GetLambdaBoxTable()->SweepWeakBoxedLambdas(visitor);
 }
 
 bool Runtime::ParseOptions(const RuntimeOptions& raw_options,
@@ -545,7 +554,7 @@
 
   // If a debug host build, disable ptrace restriction for debugging and test timeout thread dump.
   // Only 64-bit as prctl() may fail in 32 bit userspace on a 64-bit kernel.
-#if defined(__linux__) && !defined(__ANDROID__) && defined(__x86_64__)
+#if defined(__linux__) && !defined(ART_TARGET_ANDROID) && defined(__x86_64__)
   if (kIsDebugBuild) {
     CHECK_EQ(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), 0);
   }
@@ -629,17 +638,6 @@
   VLOG(startup) << "Runtime::Start exiting";
   finished_starting_ = true;
 
-  if (profiler_options_.IsEnabled() && !profile_output_filename_.empty()) {
-    // User has asked for a profile using -Xenable-profiler.
-    // Create the profile file if it doesn't exist.
-    int fd = open(profile_output_filename_.c_str(), O_RDWR|O_CREAT|O_EXCL, 0660);
-    if (fd >= 0) {
-      close(fd);
-    } else if (errno != EEXIST) {
-      LOG(WARNING) << "Failed to access the profile file. Profiler disabled.";
-    }
-  }
-
   if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
     ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
     Trace::Start(trace_config_->trace_file.c_str(),
@@ -884,12 +882,13 @@
   for (size_t i = 0; i < dex_filenames.size(); i++) {
     const char* dex_filename = dex_filenames[i].c_str();
     const char* dex_location = dex_locations[i].c_str();
+    static constexpr bool kVerifyChecksum = true;
     std::string error_msg;
     if (!OS::FileExists(dex_filename)) {
       LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
       continue;
     }
-    if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
+    if (!DexFile::Open(dex_filename, dex_location, kVerifyChecksum, &error_msg, dex_files)) {
       LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
       ++failure_count;
     }
@@ -972,16 +971,16 @@
   experimental_flags_ = runtime_options.GetOrDefault(Opt::Experimental);
   is_low_memory_mode_ = runtime_options.Exists(Opt::LowMemoryMode);
 
-  {
-    CompilerFilter::Filter filter;
-    std::string filter_str = runtime_options.GetOrDefault(Opt::OatFileManagerCompilerFilter);
-    if (!CompilerFilter::ParseCompilerFilter(filter_str.c_str(), &filter)) {
-      LOG(ERROR) << "Cannot parse compiler filter " << filter_str;
-      return false;
-    }
-    OatFileManager::SetCompilerFilter(filter);
+  if (experimental_flags_ & ExperimentalFlags::kRuntimePlugins) {
+    plugins_ = runtime_options.ReleaseOrDefault(Opt::Plugins);
   }
-
+  if (experimental_flags_ & ExperimentalFlags::kAgents) {
+    agents_ = runtime_options.ReleaseOrDefault(Opt::AgentPath);
+    // TODO Add back in -agentlib
+    // for (auto lib : runtime_options.ReleaseOrDefault(Opt::AgentLib)) {
+    //   agents_.push_back(lib);
+    // }
+  }
   XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption);
   heap_ = new gc::Heap(runtime_options.GetOrDefault(Opt::MemoryInitialSize),
                        runtime_options.GetOrDefault(Opt::HeapGrowthLimit),
@@ -1011,6 +1010,7 @@
                        xgc_option.verify_pre_sweeping_rosalloc_,
                        xgc_option.verify_post_gc_rosalloc_,
                        xgc_option.gcstress_,
+                       xgc_option.measure_,
                        runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM),
                        runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
 
@@ -1035,9 +1035,6 @@
     jit_options_->SetSaveProfilingInfo(false);
   }
 
-  // Allocate a global table of boxed lambda objects <-> closures.
-  lambda_box_table_ = MakeUnique<lambda::BoxTable>();
-
   // Use MemMap arena pool for jit, malloc otherwise. Malloc arenas are faster to allocate but
   // can't be trimmed as easily.
   const bool use_malloc = IsAotCompiler();
@@ -1108,6 +1105,10 @@
 
   java_vm_ = new JavaVMExt(this, runtime_options);
 
+  // Add the JniEnv handler.
+  // TODO Refactor this stuff.
+  java_vm_->AddEnvironmentHook(JNIEnvExt::GetEnvHandler);
+
   Thread::Startup();
 
   // ClassLinker needs an attached thread, but we can't fully attach a thread without creating
@@ -1207,26 +1208,6 @@
         Trace::TraceOutputMode::kFile;
   }
 
-  {
-    auto&& profiler_options = runtime_options.ReleaseOrDefault(Opt::ProfilerOpts);
-    profile_output_filename_ = profiler_options.output_file_name_;
-
-    // TODO: Don't do this, just change ProfilerOptions to include the output file name?
-    ProfilerOptions other_options(
-        profiler_options.enabled_,
-        profiler_options.period_s_,
-        profiler_options.duration_s_,
-        profiler_options.interval_us_,
-        profiler_options.backoff_coefficient_,
-        profiler_options.start_immediately_,
-        profiler_options.top_k_threshold_,
-        profiler_options.top_k_change_threshold_,
-        profiler_options.profile_type_,
-        profiler_options.max_stack_depth_);
-
-    profiler_options_ = other_options;
-  }
-
   // TODO: move this to just be an Trace::Start argument
   Trace::SetDefaultClockSource(runtime_options.GetOrDefault(Opt::ProfileClock));
 
@@ -1244,6 +1225,16 @@
   pre_allocated_NoClassDefFoundError_ = GcRoot<mirror::Throwable>(self->GetException());
   self->ClearException();
 
+  // Runtime initialization is largely done now.
+  // We load plugins first since that can modify the runtime state slightly.
+  // Load all plugins
+  for (auto& plugin : plugins_) {
+    std::string err;
+    if (!plugin.Load(&err)) {
+      LOG(FATAL) << plugin << " failed to load: " << err;
+    }
+  }
+
   // Look for a native bridge.
   //
   // The intended flow here is, in the case of a running system:
@@ -1276,6 +1267,20 @@
     is_native_bridge_loaded_ = LoadNativeBridge(native_bridge_file_name);
   }
 
+  // Startup agents
+  // TODO Maybe we should start a new thread to run these on. Investigate RI behavior more.
+  for (auto& agent : agents_) {
+    // TODO Check err
+    int res = 0;
+    std::string err = "";
+    ti::Agent::LoadError result = agent.Load(&res, &err);
+    if (result == ti::Agent::kInitializationError) {
+      LOG(FATAL) << "Unable to initialize agent!";
+    } else if (result != ti::Agent::kNoError) {
+      LOG(ERROR) << "Unable to load an agent: " << err;
+    }
+  }
+
   VLOG(startup) << "Runtime::Init exiting";
 
   return true;
@@ -1544,7 +1549,7 @@
   // Visiting the roots of these ArtMethods is not currently required since all the GcRoots are
   // null.
   BufferedRootVisitor<16> buffered_visitor(visitor, RootInfo(kRootVMInternal));
-  const size_t pointer_size = GetClassLinker()->GetImagePointerSize();
+  const PointerSize pointer_size = GetClassLinker()->GetImagePointerSize();
   if (HasResolutionMethod()) {
     resolution_method_->VisitRoots(buffered_visitor, pointer_size);
   }
@@ -1628,7 +1633,7 @@
   ClassLinker* const class_linker = GetClassLinker();
   ArtMethod* method = class_linker->CreateRuntimeMethod(linear_alloc);
   // When compiling, the code pointer will get set later when the image is loaded.
-  const size_t pointer_size = GetInstructionSetPointerSize(instruction_set_);
+  const PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
   if (IsAotCompiler()) {
     method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
   } else {
@@ -1650,7 +1655,7 @@
   auto* method = GetClassLinker()->CreateRuntimeMethod(GetLinearAlloc());
   // When compiling, the code pointer will get set later when the image is loaded.
   if (IsAotCompiler()) {
-    size_t pointer_size = GetInstructionSetPointerSize(instruction_set_);
+    PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
     method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
   } else {
     method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionStub());
@@ -1660,7 +1665,7 @@
 
 ArtMethod* Runtime::CreateCalleeSaveMethod() {
   auto* method = GetClassLinker()->CreateRuntimeMethod(GetLinearAlloc());
-  size_t pointer_size = GetInstructionSetPointerSize(instruction_set_);
+  PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
   method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
   DCHECK_NE(instruction_set_, kNone);
   DCHECK(method->IsRuntimeMethod());
@@ -1673,7 +1678,6 @@
   intern_table_->ChangeWeakRootState(gc::kWeakRootStateNoReadsOrWrites);
   java_vm_->DisallowNewWeakGlobals();
   heap_->DisallowNewAllocationRecords();
-  lambda_box_table_->DisallowNewWeakBoxedLambdas();
 }
 
 void Runtime::AllowNewSystemWeaks() {
@@ -1682,7 +1686,6 @@
   intern_table_->ChangeWeakRootState(gc::kWeakRootStateNormal);  // TODO: Do this in the sweeping.
   java_vm_->AllowNewWeakGlobals();
   heap_->AllowNewAllocationRecords();
-  lambda_box_table_->AllowNewWeakBoxedLambdas();
 }
 
 void Runtime::BroadcastForNewSystemWeaks() {
@@ -1693,7 +1696,6 @@
   intern_table_->BroadcastForNewInterns();
   java_vm_->BroadcastForNewWeakGlobals();
   heap_->BroadcastForNewAllocationRecords();
-  lambda_box_table_->BroadcastForNewWeakBoxedLambdas();
 }
 
 void Runtime::SetInstructionSet(InstructionSet instruction_set) {
@@ -1764,7 +1766,6 @@
     return;
   }
 
-  profile_output_filename_ = profile_output_filename;
   jit_->StartProfileSaver(profile_output_filename,
                           code_paths,
                           foreign_dex_profile_path,
@@ -1956,7 +1957,7 @@
 
 void Runtime::FixupConflictTables() {
   // We can only do this after the class linker is created.
-  const size_t pointer_size = GetClassLinker()->GetImagePointerSize();
+  const PointerSize pointer_size = GetClassLinker()->GetImagePointerSize();
   if (imt_unimplemented_method_->GetImtConflictTable(pointer_size) == nullptr) {
     imt_unimplemented_method_->SetImtConflictTable(
         ClassLinker::CreateImtConflictTable(/*count*/0u, GetLinearAlloc(), pointer_size),
@@ -1978,6 +1979,11 @@
   return verify_ == verifier::VerifyMode::kSoftFail;
 }
 
+bool Runtime::IsDeoptimizeable(uintptr_t code) const
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return !heap_->IsInBootImageOatFile(reinterpret_cast<void *>(code));
+}
+
 LinearAlloc* Runtime::CreateLinearAlloc() {
   // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a
   // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold
@@ -2010,9 +2016,4 @@
   return (jit_ != nullptr) && jit_->UseJitCompilation();
 }
 
-// Returns true if profile saving is enabled. GetJit() will be not null in this case.
-bool Runtime::SaveProfileInfo() const {
-  return (jit_ != nullptr) && jit_->SaveProfilingInfo();
-}
-
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 1394462..6da60f2 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -36,7 +36,6 @@
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "process_state.h"
-#include "profiler_options.h"
 #include "quick/quick_method_frame_info.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
@@ -55,10 +54,6 @@
   class JitOptions;
 }  // namespace jit
 
-namespace lambda {
-  class BoxTable;
-}  // namespace lambda
-
 namespace mirror {
   class ClassLoader;
   class Array;
@@ -68,6 +63,9 @@
   class String;
   class Throwable;
 }  // namespace mirror
+namespace ti {
+  class Agent;
+}  // namespace ti
 namespace verifier {
   class MethodVerifier;
   enum class VerifyMode : int8_t;
@@ -85,6 +83,7 @@
 class MonitorPool;
 class NullPointerHandler;
 class OatFileManager;
+class Plugin;
 struct RuntimeArgumentMap;
 class SignalCatcher;
 class StackOverflowHandler;
@@ -192,10 +191,6 @@
     return image_location_;
   }
 
-  const ProfilerOptions& GetProfilerOptions() const {
-    return profiler_options_;
-  }
-
   // Starts a runtime, which may cause threads to be started and code to run.
   bool Start() UNLOCK_FUNCTION(Locks::mutator_lock_);
 
@@ -392,10 +387,11 @@
 
   // Returns a special method that describes all callee saves being spilled to the stack.
   enum CalleeSaveType {
-    kSaveAll,
-    kRefsOnly,
-    kRefsAndArgs,
-    kLastCalleeSaveType  // Value used for iteration
+    kSaveAllCalleeSaves,  // All callee-save registers.
+    kSaveRefsOnly,        // Only those callee-save registers that can hold references.
+    kSaveRefsAndArgs,     // References (see above) and arguments (usually caller-save registers).
+    kSaveEverything,      // All registers, including both callee-save and caller-save.
+    kLastCalleeSaveType   // Value used for iteration
   };
 
   bool HasCalleeSaveMethod(CalleeSaveType type) const {
@@ -455,8 +451,6 @@
 
   // Returns true if JIT compilations are enabled. GetJit() will be not null in this case.
   bool UseJitCompilation() const;
-  // Returns true if profile saving is enabled. GetJit() will be not null in this case.
-  bool SaveProfileInfo() const;
 
   void PreZygoteFork();
   bool InitZygote();
@@ -559,10 +553,6 @@
     return (experimental_flags_ & flags) != ExperimentalFlags::kNone;
   }
 
-  lambda::BoxTable* GetLambdaBoxTable() const {
-    return lambda_box_table_.get();
-  }
-
   // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
@@ -648,6 +638,10 @@
     return zygote_no_threads_;
   }
 
+  // Returns if the code can be deoptimized. Code may be compiled with some
+  // optimization that makes it impossible to deoptimize.
+  bool IsDeoptimizeable(uintptr_t code) const SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -708,6 +702,9 @@
   std::string class_path_string_;
   std::vector<std::string> properties_;
 
+  std::vector<ti::Agent> agents_;
+  std::vector<Plugin> plugins_;
+
   // The default stack size for managed threads created by the runtime.
   size_t default_stack_size_;
 
@@ -742,8 +739,6 @@
   std::unique_ptr<jit::Jit> jit_;
   std::unique_ptr<jit::JitOptions> jit_options_;
 
-  std::unique_ptr<lambda::BoxTable> lambda_box_table_;
-
   // Fault message, printed when we get a SIGSEGV.
   Mutex fault_message_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::string fault_message_ GUARDED_BY(fault_message_lock_);
@@ -778,9 +773,6 @@
 
   const bool is_running_on_memory_tool_;
 
-  std::string profile_output_filename_;
-  ProfilerOptions profiler_options_;
-
   std::unique_ptr<TraceConfig> trace_config_;
 
   instrumentation::Instrumentation instrumentation_;
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index bc963c5..60ebabc 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -28,6 +28,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/stringprintf.h"
+#include "native_stack_dump.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 2a96703..146afc7 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -75,7 +75,6 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        JITInvokeTransitionWeight)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
-RUNTIME_OPTIONS_KEY (bool,                JITSaveProfilingInfo,           false)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
@@ -105,7 +104,7 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        MethodTraceFileSize,            10 * MB)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTraceStreaming)
 RUNTIME_OPTIONS_KEY (TraceClockSource,    ProfileClock,                   kDefaultTraceClockSource)  // -Xprofile:
-RUNTIME_OPTIONS_KEY (TestProfilerOptions, ProfilerOpts)  // -Xenable-profiler, -Xprofile-*
+RUNTIME_OPTIONS_KEY (ProfileSaverOptions, ProfileSaverOpts)  // -Xjitsaveprofilinginfo, -Xps-*
 RUNTIME_OPTIONS_KEY (std::string,         Compiler)
 RUNTIME_OPTIONS_KEY (std::vector<std::string>, \
                                           CompilerOptions)  // -Xcompiler-option ...
@@ -118,7 +117,10 @@
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 RUNTIME_OPTIONS_KEY (std::string,         Fingerprint)
-RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{none, lambdas}
+RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{none, agents}
+RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentLib)  // -agentlib:<libname>=<options>, Requires -Xexperimental:agents
+RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentPath)  // -agentpath:<libname>=<options>, Requires -Xexperimental:agents
+RUNTIME_OPTIONS_KEY (std::vector<Plugin>,            Plugins)  // -Xplugin:<library> Requires -Xexperimental:runtime-plugins
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
@@ -129,11 +131,12 @@
 RUNTIME_OPTIONS_KEY (bool (*)(),          HookIsSensitiveThread)
 RUNTIME_OPTIONS_KEY (int32_t (*)(FILE* stream, const char* format, va_list ap), \
                                           HookVfprintf,                   vfprintf)
+// Use _exit instead of exit so that we won't get DCHECK failures in global data
+// destructors. b/28106055.
 RUNTIME_OPTIONS_KEY (void (*)(int32_t status), \
-                                          HookExit,                       exit)
+                                          HookExit,                       _exit)
                                                                           // We don't call abort(3) by default; see
                                                                           // Runtime::Abort.
 RUNTIME_OPTIONS_KEY (void (*)(),          HookAbort,                      nullptr)
-RUNTIME_OPTIONS_KEY (std::string,         OatFileManagerCompilerFilter,   "speed")
 
 #undef RUNTIME_OPTIONS_KEY
diff --git a/runtime/runtime_options.h b/runtime/runtime_options.h
index 4610f6f..5fcb86e 100644
--- a/runtime/runtime_options.h
+++ b/runtime/runtime_options.h
@@ -29,8 +29,8 @@
 #include "jit/jit_code_cache.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
-#include "profiler_options.h"
 #include "arch/instruction_set.h"
+#include "jit/profile_saver_options.h"
 #include "verifier/verify_mode.h"
 #include <stdio.h>
 #include <stdarg.h>
@@ -41,7 +41,6 @@
 class DexFile;
 struct XGcOption;
 struct BackgroundGcOption;
-struct TestProfilerOptions;
 
 #define DECLARE_KEY(Type, Name) static const Key<Type> Name
 
@@ -73,7 +72,7 @@
     using Key = RuntimeArgumentMapKey<TValue>;
 
     // List of key declarations, shorthand for 'static const Key<T> Name'
-#define RUNTIME_OPTIONS_KEY(Type, Name, ...) static const Key<Type> Name;
+#define RUNTIME_OPTIONS_KEY(Type, Name, ...) static const Key<Type> (Name);
 #include "runtime_options.def"
   };
 
diff --git a/runtime/signal_set.h b/runtime/signal_set.h
index c272514..6f88852 100644
--- a/runtime/signal_set.h
+++ b/runtime/signal_set.h
@@ -38,8 +38,8 @@
   }
 
   void Block() {
-    if (sigprocmask(SIG_BLOCK, &set_, nullptr) == -1) {
-      PLOG(FATAL) << "sigprocmask failed";
+    if (pthread_sigmask(SIG_BLOCK, &set_, nullptr) != 0) {
+      PLOG(FATAL) << "pthread_sigmask failed";
     }
   }
 
diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
index 5c71da6..e39af2d 100644
--- a/runtime/simulator/Android.mk
+++ b/runtime/simulator/Android.mk
@@ -22,6 +22,9 @@
   code_simulator.cc \
   code_simulator_arm64.cc
 
+LIBART_SIMULATOR_CFLAGS := \
+  -DVIXL_INCLUDE_SIMULATOR_AARCH64
+
 # $(1): target or host
 # $(2): ndebug or debug
 define build-libart-simulator
@@ -54,6 +57,7 @@
   LOCAL_MODULE_CLASS := SHARED_LIBRARIES
 
   LOCAL_SRC_FILES := $$(LIBART_SIMULATOR_SRC_FILES)
+  LOCAL_CFLAGS := $$(LIBART_SIMULATOR_CFLAGS)
 
   ifeq ($$(art_target_or_host),target)
     $(call set-target-local-clang-vars)
@@ -65,8 +69,10 @@
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
@@ -86,9 +92,9 @@
   LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
   # For simulator_arm64.
   ifeq ($$(art_ndebug_or_debug),debug)
-     LOCAL_SHARED_LIBRARIES += libvixl
+    LOCAL_SHARED_LIBRARIES += libvixld-arm64
   else
-     LOCAL_SHARED_LIBRARIES += libvixl
+    LOCAL_SHARED_LIBRARIES += libvixl-arm64
   endif
   ifeq ($$(art_target_or_host),target)
     include $(BUILD_SHARED_LIBRARY)
diff --git a/runtime/simulator/code_simulator_arm64.cc b/runtime/simulator/code_simulator_arm64.cc
index 39dfa6d..897d4f5 100644
--- a/runtime/simulator/code_simulator_arm64.cc
+++ b/runtime/simulator/code_simulator_arm64.cc
@@ -16,13 +16,15 @@
 
 #include "simulator/code_simulator_arm64.h"
 
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
+
 namespace art {
 namespace arm64 {
 
-// VIXL has not been tested on 32bit architectures, so vixl::Simulator is not always
+// VIXL has not been tested on 32bit architectures, so Simulator is not always
 // available. To avoid linker error on these architectures, we check if we can simulate
 // in the beginning of following methods, with compile time constant `kCanSimulate`.
-// TODO: when vixl::Simulator is always available, remove the these checks.
+// TODO: when Simulator is always available, remove the these checks.
 
 CodeSimulatorArm64* CodeSimulatorArm64::CreateCodeSimulatorArm64() {
   if (kCanSimulate) {
@@ -35,8 +37,8 @@
 CodeSimulatorArm64::CodeSimulatorArm64()
     : CodeSimulator(), decoder_(nullptr), simulator_(nullptr) {
   DCHECK(kCanSimulate);
-  decoder_ = new vixl::Decoder();
-  simulator_ = new vixl::Simulator(decoder_);
+  decoder_ = new Decoder();
+  simulator_ = new Simulator(decoder_);
 }
 
 CodeSimulatorArm64::~CodeSimulatorArm64() {
@@ -47,22 +49,22 @@
 
 void CodeSimulatorArm64::RunFrom(intptr_t code_buffer) {
   DCHECK(kCanSimulate);
-  simulator_->RunFrom(reinterpret_cast<const vixl::Instruction*>(code_buffer));
+  simulator_->RunFrom(reinterpret_cast<const Instruction*>(code_buffer));
 }
 
 bool CodeSimulatorArm64::GetCReturnBool() const {
   DCHECK(kCanSimulate);
-  return simulator_->wreg(0);
+  return simulator_->ReadWRegister(0);
 }
 
 int32_t CodeSimulatorArm64::GetCReturnInt32() const {
   DCHECK(kCanSimulate);
-  return simulator_->wreg(0);
+  return simulator_->ReadWRegister(0);
 }
 
 int64_t CodeSimulatorArm64::GetCReturnInt64() const {
   DCHECK(kCanSimulate);
-  return simulator_->xreg(0);
+  return simulator_->ReadXRegister(0);
 }
 
 }  // namespace arm64
diff --git a/runtime/simulator/code_simulator_arm64.h b/runtime/simulator/code_simulator_arm64.h
index 10fceb9..59ea34f 100644
--- a/runtime/simulator/code_simulator_arm64.h
+++ b/runtime/simulator/code_simulator_arm64.h
@@ -19,10 +19,11 @@
 
 #include "memory"
 #include "simulator/code_simulator.h"
-// TODO: make vixl clean wrt -Wshadow.
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
-#include "vixl/a64/simulator-a64.h"
+#include "aarch64/simulator-aarch64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
@@ -42,10 +43,10 @@
  private:
   CodeSimulatorArm64();
 
-  vixl::Decoder* decoder_;
-  vixl::Simulator* simulator_;
+  vixl::aarch64::Decoder* decoder_;
+  vixl::aarch64::Simulator* simulator_;
 
-  // TODO: Enable CodeSimulatorArm64 for more host ISAs once vixl::Simulator supports them.
+  // TODO: Enable CodeSimulatorArm64 for more host ISAs once Simulator supports them.
   static constexpr bool kCanSimulate = (kRuntimeISA == kX86_64);
 
   DISALLOW_COPY_AND_ASSIGN(CodeSimulatorArm64);
diff --git a/runtime/stack.cc b/runtime/stack.cc
index a5ca527..ababf78 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -18,6 +18,7 @@
 
 #include "arch/context.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/hex_dump.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
@@ -131,16 +132,10 @@
       const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
       CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
       DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames);
-      bool allow_resolve = walk_kind_ != StackWalkKind::kIncludeInlinedFramesNoResolve;
-      return allow_resolve
-          ? GetResolvedMethod<true>(*GetCurrentQuickFrame(),
-                                    inline_info,
-                                    encoding.inline_info_encoding,
-                                    depth_in_stack_map)
-          : GetResolvedMethod<false>(*GetCurrentQuickFrame(),
-                                     inline_info,
-                                     encoding.inline_info_encoding,
-                                     depth_in_stack_map);
+      return GetResolvedMethod(*GetCurrentQuickFrame(),
+                               inline_info,
+                               encoding.inline_info_encoding,
+                               depth_in_stack_map);
     } else {
       return *cur_quick_frame_;
     }
@@ -173,7 +168,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_);
 
 mirror::Object* StackVisitor::GetThisObject() const {
-  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
   ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
     return nullptr;
@@ -734,7 +729,7 @@
   Runtime* runtime = Runtime::Current();
 
   if (method->IsAbstract()) {
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
   }
 
   // This goes before IsProxyMethod since runtime methods have a null declaring class.
@@ -748,18 +743,20 @@
     // compiled method without any stubs. Therefore the method must have a OatQuickMethodHeader.
     DCHECK(!method->IsDirect() && !method->IsConstructor())
         << "Constructors of proxy classes must have a OatQuickMethodHeader";
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
   }
 
   // The only remaining case is if the method is native and uses the generic JNI stub.
   DCHECK(method->IsNative());
   ClassLinker* class_linker = runtime->GetClassLinker();
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method, sizeof(void*));
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method,
+                                                                           kRuntimePointerSize);
   DCHECK(class_linker->IsQuickGenericJniStub(entry_point)) << PrettyMethod(method);
   // Generic JNI frame.
   uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(method) + 1;
   size_t scope_size = HandleScope::SizeOf(handle_refs);
-  QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+  QuickMethodFrameInfo callee_info =
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
 
   // Callee saves + handle scope + method ref + alignment
   // Note: -sizeof(void*) since callee-save frame stores a whole method pointer.
@@ -791,8 +788,7 @@
         cur_oat_quick_method_header_ = method->GetOatQuickMethodHeader(cur_quick_frame_pc_);
         SanityCheckFrame();
 
-        if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames ||
-             walk_kind_ == StackWalkKind::kIncludeInlinedFramesNoResolve)
+        if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames)
             && (cur_oat_quick_method_header_ != nullptr)
             && cur_oat_quick_method_header_->IsOptimized()) {
           CodeInfo code_info = cur_oat_quick_method_header_->GetOptimizedCodeInfo();
@@ -838,10 +834,12 @@
             const instrumentation::InstrumentationStackFrame& instrumentation_frame =
                 GetInstrumentationStackFrame(thread_, instrumentation_stack_depth);
             instrumentation_stack_depth++;
-            if (GetMethod() == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAll)) {
+            if (GetMethod() ==
+                Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
               // Skip runtime save all callee frames which are used to deliver exceptions.
             } else if (instrumentation_frame.interpreter_entry_) {
-              ArtMethod* callee = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+              ArtMethod* callee =
+                  Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
               CHECK_EQ(GetMethod(), callee) << "Expected: " << PrettyMethod(callee) << " Found: "
                                             << PrettyMethod(GetMethod());
             } else {
@@ -915,7 +913,7 @@
 int StackVisitor::GetVRegOffsetFromQuickCode(const DexFile::CodeItem* code_item,
                                              uint32_t core_spills, uint32_t fp_spills,
                                              size_t frame_size, int reg, InstructionSet isa) {
-  size_t pointer_size = InstructionSetPointerSize(isa);
+  PointerSize pointer_size = InstructionSetPointerSize(isa);
   if (kIsDebugBuild) {
     auto* runtime = Runtime::Current();
     if (runtime != nullptr) {
@@ -938,7 +936,8 @@
      * Special temporaries may have custom locations and the logic above deals with that.
      * However, non-special temporaries are placed relative to the outs.
      */
-    int temps_start = code_item->outs_size_ * sizeof(uint32_t) + pointer_size /* art method */;
+    int temps_start = code_item->outs_size_ * sizeof(uint32_t)
+        + static_cast<size_t>(pointer_size) /* art method */;
     int relative_offset = (reg - (temp_threshold + max_num_special_temps)) * sizeof(uint32_t);
     return temps_start + relative_offset;
   }  else if (reg < num_regs) {
@@ -946,7 +945,8 @@
     return locals_start + (reg * sizeof(uint32_t));
   } else {
     // Handle ins.
-    return frame_size + ((reg - num_regs) * sizeof(uint32_t)) + pointer_size /* art method */;
+    return frame_size + ((reg - num_regs) * sizeof(uint32_t))
+        + static_cast<size_t>(pointer_size) /* art method */;
   }
 }
 
diff --git a/runtime/stack.h b/runtime/stack.h
index e77ab46..cf33ae1 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -210,6 +210,10 @@
     code_item_ = code_item;
   }
 
+  const DexFile::CodeItem* GetCodeItem() const {
+    return code_item_;
+  }
+
   float GetVRegFloat(size_t i) const {
     DCHECK_LT(i, NumberOfVRegs());
     // NOTE: Strict-aliasing?
@@ -407,6 +411,10 @@
     return dex_pc_ptr_;
   }
 
+  void SetDexPCPtr(uint16_t* dex_pc_ptr) {
+    dex_pc_ptr_ = dex_pc_ptr;
+  }
+
   JValue* GetResultRegister() {
     return result_register_;
   }
@@ -568,7 +576,6 @@
   // when walking the stack.
   enum class StackWalkKind {
     kIncludeInlinedFrames,
-    kIncludeInlinedFramesNoResolve,
     kSkipInlinedFrames,
   };
 
@@ -727,7 +734,7 @@
 
   static int GetOutVROffset(uint16_t out_num, InstructionSet isa) {
     // According to stack model, the first out is above the Method referernce.
-    return InstructionSetPointerSize(isa) + out_num * sizeof(uint32_t);
+    return static_cast<size_t>(InstructionSetPointerSize(isa)) + out_num * sizeof(uint32_t);
   }
 
   bool IsInInlinedFrame() const {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 7c50f97..dd7e531 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -19,6 +19,7 @@
 
 #include "base/bit_vector.h"
 #include "base/bit_utils.h"
+#include "dex_file.h"
 #include "memory_region.h"
 #include "leb128.h"
 
@@ -892,7 +893,11 @@
     total_bit_size_ += MinimumBitsToStore(method_index_max);
 
     dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
-    total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+    // Note: We're not encoding the dex pc if there is none. That's the case
+    // for an intrinsified native method, such as String.charAt().
+    if (dex_pc_max != DexFile::kDexNoIndex) {
+      total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+    }
 
     invoke_type_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
     total_bit_size_ += MinimumBitsToStore(invoke_type_max);
@@ -1045,7 +1050,7 @@
       inline_info_encoding = *reinterpret_cast<const InlineInfoEncoding*>(ptr);
       ptr += sizeof(InlineInfoEncoding);
     } else {
-      inline_info_encoding = InlineInfoEncoding{}; // NOLINT.
+      inline_info_encoding = InlineInfoEncoding{};  // NOLINT.
     }
     header_size = dchecked_integral_cast<uint8_t>(ptr - reinterpret_cast<const uint8_t*>(data));
   }
diff --git a/compiler/utils/string_reference.h b/runtime/string_reference.h
similarity index 68%
rename from compiler/utils/string_reference.h
rename to runtime/string_reference.h
index 72552f2..c75c218 100644
--- a/compiler/utils/string_reference.h
+++ b/runtime/string_reference.h
@@ -14,26 +14,39 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_UTILS_STRING_REFERENCE_H_
-#define ART_COMPILER_UTILS_STRING_REFERENCE_H_
+#ifndef ART_RUNTIME_STRING_REFERENCE_H_
+#define ART_RUNTIME_STRING_REFERENCE_H_
 
 #include <stdint.h>
 
 #include "base/logging.h"
+#include "dex_file-inl.h"
 #include "utf-inl.h"
 
 namespace art {
 
-class DexFile;
-
-// A string is uniquely located by its DexFile and the string_ids_ table index into that DexFile.
+// A string is located by its DexFile and the string_ids_ table index into that DexFile.
 struct StringReference {
   StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { }
 
+  const char* GetStringData() const {
+    return dex_file->GetStringData(dex_file->GetStringId(string_index));
+  }
+
   const DexFile* dex_file;
   uint32_t string_index;
 };
 
+// Compare only the reference and not the string contents.
+struct StringReferenceComparator {
+  bool operator()(const StringReference& a, const StringReference& b) {
+    if (a.dex_file != b.dex_file) {
+      return a.dex_file < b.dex_file;
+    }
+    return a.string_index < b.string_index;
+  }
+};
+
 // Compare the actual referenced string values. Used for string reference deduplication.
 struct StringReferenceValueComparator {
   bool operator()(StringReference sr1, StringReference sr2) const {
@@ -46,19 +59,17 @@
       // Use the string order enforced by the dex file verifier.
       DCHECK_EQ(
           sr1.string_index < sr2.string_index,
-          CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
-              sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)),
-              sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0);
+          CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(),
+                                                                  sr2.GetStringData()) < 0);
       return sr1.string_index < sr2.string_index;
     } else {
       // Cannot compare indexes, so do the string comparison.
-      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
-          sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)),
-          sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0;
+      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(),
+                                                                     sr2.GetStringData()) < 0;
     }
   }
 };
 
 }  // namespace art
 
-#endif  // ART_COMPILER_UTILS_STRING_REFERENCE_H_
+#endif  // ART_RUNTIME_STRING_REFERENCE_H_
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index f5d20bd..216d8a7 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -19,7 +19,7 @@
 
 #include "thread.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <bionic_tls.h>  // Access to our own TLS slot.
 #endif
 
@@ -45,7 +45,7 @@
   if (!is_started_) {
     return nullptr;
   } else {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     void* thread = __get_tls()[TLS_SLOT_ART_THREAD_SELF];
 #else
     void* thread = pthread_getspecific(Thread::pthread_key_self_);
@@ -93,6 +93,18 @@
   return static_cast<ThreadState>(old_state_and_flags.as_struct.state);
 }
 
+inline bool Thread::IsThreadSuspensionAllowable() const {
+  if (tls32_.no_thread_suspension != 0) {
+    return false;
+  }
+  for (int i = kLockLevelCount - 1; i >= 0; --i) {
+    if (i != kMutatorLock && GetHeldMutex(static_cast<LockLevel>(i)) != nullptr) {
+      return false;
+    }
+  }
+  return true;
+}
+
 inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
   if (kIsDebugBuild) {
     if (gAborting == 0) {
@@ -201,12 +213,23 @@
                  << " state=" << old_state_and_flags.as_struct.state;
     } else if ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
       // Wait while our suspend count is non-zero.
-      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+
+      // We pass null to the MutexLock as we may be in a situation where the
+      // runtime is shutting down. Guarding ourselves from that situation
+      // requires to take the shutdown lock, which is undesirable here.
+      Thread* thread_to_pass = nullptr;
+      if (kIsDebugBuild && !IsDaemon()) {
+        // We know we can make our debug locking checks on non-daemon threads,
+        // so re-enable them on debug builds.
+        thread_to_pass = this;
+      }
+      MutexLock mu(thread_to_pass, *Locks::thread_suspend_count_lock_);
+      ScopedTransitioningToRunnable scoped_transitioning_to_runnable(this);
       old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
       DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
         // Re-check when Thread::resume_cond_ is notified.
-        Thread::resume_cond_->Wait(this);
+        Thread::resume_cond_->Wait(thread_to_pass);
         old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
         DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 16f30cd..79b9f02 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -55,6 +55,8 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "monitor.h"
+#include "native_stack_dump.h"
+#include "nth_caller_visitor.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "quick_exception_handler.h"
@@ -84,6 +86,8 @@
 
 namespace art {
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self);
+
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
@@ -270,7 +274,6 @@
   StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record;
   if (must_be_present) {
     DCHECK(record != nullptr);
-    DCHECK_EQ(record->GetType(), type);
   } else {
     if (record == nullptr || record->GetType() != type) {
       return nullptr;
@@ -513,14 +516,20 @@
   return stack_size;
 }
 
+// Return the nearest page-aligned address below the current stack top.
+NO_INLINE
+static uint8_t* FindStackTop() {
+  return reinterpret_cast<uint8_t*>(
+      AlignDown(__builtin_frame_address(0), kPageSize));
+}
+
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_begin_.
 ATTRIBUTE_NO_SANITIZE_ADDRESS
 void Thread::InstallImplicitProtection() {
   uint8_t* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
-  uint8_t* stack_himem = tlsPtr_.stack_end;
-  uint8_t* stack_top = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(&stack_himem) &
-      ~(kPageSize - 1));    // Page containing current top of stack.
+  // Page containing current top of stack.
+  uint8_t* stack_top = FindStackTop();
 
   // Try to directly protect the stack.
   VLOG(threads) << "installing stack protected region at " << std::hex <<
@@ -705,7 +714,7 @@
   InitTid();
   interpreter::InitInterpreterTls(this);
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = this;
 #else
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
@@ -916,10 +925,22 @@
 
   Runtime* runtime = Runtime::Current();
   bool implicit_stack_check = !runtime->ExplicitStackOverflowChecks() && !runtime->IsAotCompiler();
+
+  // Valgrind on arm doesn't give the right values here. Do not install the guard page, and
+  // effectively disable stack overflow checks (we'll get segfaults, potentially) by setting
+  // stack_begin to 0.
+  const bool valgrind_on_arm =
+      (kRuntimeISA == kArm || kRuntimeISA == kArm64) &&
+      kMemoryToolIsValgrind &&
+      RUNNING_ON_MEMORY_TOOL != 0;
+  if (valgrind_on_arm) {
+    tlsPtr_.stack_begin = nullptr;
+  }
+
   ResetDefaultStackEnd();
 
   // Install the protected region if we are doing implicit overflow checks.
-  if (implicit_stack_check) {
+  if (implicit_stack_check && !valgrind_on_arm) {
     // The thread might have protected region at the bottom.  We need
     // to install our own region so we need to move the limits
     // of the stack to make room for it.
@@ -932,8 +953,7 @@
   }
 
   // Sanity check.
-  int stack_variable;
-  CHECK_GT(&stack_variable, reinterpret_cast<void*>(tlsPtr_.stack_end));
+  CHECK_GT(FindStackTop(), reinterpret_cast<void*>(tlsPtr_.stack_end));
 
   return true;
 }
@@ -1115,32 +1135,36 @@
 }
 
 void Thread::RunCheckpointFunction() {
-  Closure *checkpoints[kMaxCheckpoints];
-
-  // Grab the suspend_count lock and copy the current set of
-  // checkpoints.  Then clear the list and the flag.  The RequestCheckpoint
-  // function will also grab this lock so we prevent a race between setting
-  // the kCheckpointRequest flag and clearing it.
-  {
-    MutexLock mu(this, *Locks::thread_suspend_count_lock_);
-    for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-      checkpoints[i] = tlsPtr_.checkpoint_functions[i];
-      tlsPtr_.checkpoint_functions[i] = nullptr;
+  bool done = false;
+  do {
+    // Grab the suspend_count lock and copy the checkpoints one by one. When the last checkpoint is
+    // copied, clear the list and the flag. The RequestCheckpoint function will also grab this lock
+    // to prevent a race between setting the kCheckpointRequest flag and clearing it.
+    Closure* checkpoint = nullptr;
+    {
+      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+      if (tlsPtr_.checkpoint_function != nullptr) {
+        checkpoint = tlsPtr_.checkpoint_function;
+        if (!checkpoint_overflow_.empty()) {
+          // Overflow list not empty, copy the first one out and continue.
+          tlsPtr_.checkpoint_function = checkpoint_overflow_.front();
+          checkpoint_overflow_.pop_front();
+        } else {
+          // No overflow checkpoints, this means that we are on the last pending checkpoint.
+          tlsPtr_.checkpoint_function = nullptr;
+          AtomicClearFlag(kCheckpointRequest);
+          done = true;
+        }
+      } else {
+        LOG(FATAL) << "Checkpoint flag set without pending checkpoint";
+      }
     }
-    AtomicClearFlag(kCheckpointRequest);
-  }
 
-  // Outside the lock, run all the checkpoint functions that
-  // we collected.
-  bool found_checkpoint = false;
-  for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-    if (checkpoints[i] != nullptr) {
-      ScopedTrace trace("Run checkpoint function");
-      checkpoints[i]->Run(this);
-      found_checkpoint = true;
-    }
-  }
-  CHECK(found_checkpoint);
+    // Outside the lock, run the checkpoint functions that we collected.
+    ScopedTrace trace("Run checkpoint function");
+    DCHECK(checkpoint != nullptr);
+    checkpoint->Run(this);
+  } while (!done);
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
@@ -1150,20 +1174,6 @@
     return false;  // Fail, thread is suspended and so can't run a checkpoint.
   }
 
-  uint32_t available_checkpoint = kMaxCheckpoints;
-  for (uint32_t i = 0 ; i < kMaxCheckpoints; ++i) {
-    if (tlsPtr_.checkpoint_functions[i] == nullptr) {
-      available_checkpoint = i;
-      break;
-    }
-  }
-  if (available_checkpoint == kMaxCheckpoints) {
-    // No checkpoint functions available, we can't run a checkpoint
-    return false;
-  }
-  tlsPtr_.checkpoint_functions[available_checkpoint] = function;
-
-  // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
   DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
   union StateAndFlags new_state_and_flags;
@@ -1171,11 +1181,13 @@
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
   bool success = tls32_.state_and_flags.as_atomic_int.CompareExchangeStrongSequentiallyConsistent(
       old_state_and_flags.as_int, new_state_and_flags.as_int);
-  if (UNLIKELY(!success)) {
-    // The thread changed state before the checkpoint was installed.
-    CHECK_EQ(tlsPtr_.checkpoint_functions[available_checkpoint], function);
-    tlsPtr_.checkpoint_functions[available_checkpoint] = nullptr;
-  } else {
+  if (success) {
+    // Succeeded setting checkpoint flag, now insert the actual checkpoint.
+    if (tlsPtr_.checkpoint_function == nullptr) {
+      tlsPtr_.checkpoint_function = function;
+    } else {
+      checkpoint_overflow_.push_back(function);
+    }
     CHECK_EQ(ReadFlag(kCheckpointRequest), true);
     TriggerSuspend();
   }
@@ -1205,10 +1217,8 @@
   ScopedTrace trace(__FUNCTION__);
   VLOG(threads) << this << " self-suspending";
   // Make thread appear suspended to other threads, release mutator_lock_.
-  tls32_.suspended_at_suspend_check = true;
   // Transition to suspended and back to runnable, re-acquire share on mutator_lock_.
   ScopedThreadSuspension(this, kSuspended);
-  tls32_.suspended_at_suspend_check = false;
   VLOG(threads) << this << " self-reviving";
 }
 
@@ -1372,7 +1382,7 @@
     if (m->IsRuntimeMethod()) {
       return true;
     }
-    m = m->GetInterfaceMethodIfProxy(sizeof(void*));
+    m = m->GetInterfaceMethodIfProxy(kRuntimePointerSize);
     const int kMaxRepetition = 3;
     mirror::Class* c = m->GetDeclaringClass();
     mirror::DexCache* dex_cache = c->GetDexCache();
@@ -1421,6 +1431,12 @@
     if (o == nullptr) {
       os << "an unknown object";
     } else {
+      if (kUseReadBarrier && Thread::Current()->GetIsGcMarking()) {
+        // We may call Thread::Dump() in the middle of the CC thread flip and this thread's stack
+        // may have not been flipped yet and "o" may be a from-space (stale) ref, in which case the
+        // IdentityHashCode call below will crash. So explicitly mark/forward it here.
+        o = ReadBarrier::Mark(o);
+      }
       if ((o->GetLockWord(false).GetState() == LockWord::kThinLocked) &&
           Locks::mutator_lock_->IsExclusiveHeld(Thread::Current())) {
         // Getting the identity hashcode here would result in lock inflation and suspension of the
@@ -1543,7 +1559,7 @@
     LOG(WARNING) << "Native thread exiting without having called DetachCurrentThread (maybe it's "
         "going to use a pthread_key_create destructor?): " << *self;
     CHECK(is_started_);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     __get_tls()[TLS_SLOT_ART_THREAD_SELF] = self;
 #else
     CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, self), "reattach self");
@@ -1617,15 +1633,13 @@
   std::fill(tlsPtr_.rosalloc_runs,
             tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
             gc::allocator::RosAlloc::GetDedicatedFullRun());
-  for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-    tlsPtr_.checkpoint_functions[i] = nullptr;
-  }
+  tlsPtr_.checkpoint_function = nullptr;
   for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
     tlsPtr_.active_suspend_barriers[i] = nullptr;
   }
   tlsPtr_.flip_function = nullptr;
   tlsPtr_.thread_local_mark_stack = nullptr;
-  tls32_.suspended_at_suspend_check = false;
+  tls32_.is_transitioning_to_runnable = false;
 }
 
 bool Thread::IsStillStarting() const {
@@ -1760,11 +1774,10 @@
   }
   CHECK_NE(GetState(), kRunnable);
   CHECK_NE(ReadFlag(kCheckpointRequest), true);
-  CHECK(tlsPtr_.checkpoint_functions[0] == nullptr);
-  CHECK(tlsPtr_.checkpoint_functions[1] == nullptr);
-  CHECK(tlsPtr_.checkpoint_functions[2] == nullptr);
+  CHECK(tlsPtr_.checkpoint_function == nullptr);
+  CHECK_EQ(checkpoint_overflow_.size(), 0u);
   CHECK(tlsPtr_.flip_function == nullptr);
-  CHECK_EQ(tls32_.suspended_at_suspend_check, false);
+  CHECK_EQ(tls32_.is_transitioning_to_runnable, false);
 
   // Make sure we processed all deoptimization requests.
   CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization";
@@ -1809,22 +1822,12 @@
   ScopedLocalRef<jthrowable> exception(tlsPtr_.jni_env, tlsPtr_.jni_env->ExceptionOccurred());
   tlsPtr_.jni_env->ExceptionClear();
 
-  // If the thread has its own handler, use that.
-  ScopedLocalRef<jobject> handler(tlsPtr_.jni_env,
-                                  tlsPtr_.jni_env->GetObjectField(peer.get(),
-                                      WellKnownClasses::java_lang_Thread_uncaughtHandler));
-  if (handler.get() == nullptr) {
-    // Otherwise use the thread group's default handler.
-    handler.reset(tlsPtr_.jni_env->GetObjectField(peer.get(),
-                                                  WellKnownClasses::java_lang_Thread_group));
-  }
+  // Call the Thread instance's dispatchUncaughtException(Throwable)
+  tlsPtr_.jni_env->CallVoidMethod(peer.get(),
+      WellKnownClasses::java_lang_Thread_dispatchUncaughtException,
+      exception.get());
 
-  // Call the handler.
-  tlsPtr_.jni_env->CallVoidMethod(handler.get(),
-      WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler_uncaughtException,
-      peer.get(), exception.get());
-
-  // If the handler threw, clear that exception too.
+  // If the dispatchUncaughtException threw, clear that exception too.
   tlsPtr_.jni_env->ExceptionClear();
 }
 
@@ -2103,7 +2106,7 @@
   // the i'th frame.
   mirror::ObjectArray<mirror::Object>* trace_;
   // For cross compilation.
-  const size_t pointer_size_;
+  const PointerSize pointer_size_;
 
   DISALLOW_COPY_AND_ASSIGN(BuildInternalStackTraceVisitor);
 };
@@ -2190,9 +2193,9 @@
     mirror::PointerArray* const method_trace =
         down_cast<mirror::PointerArray*>(decoded_traces->Get(0));
     // Prepare parameters for StackTraceElement(String cls, String method, String file, int line)
-    ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, sizeof(void*));
+    ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, kRuntimePointerSize);
     uint32_t dex_pc = method_trace->GetElementPtrSize<uint32_t>(
-        i + method_trace->GetLength() / 2, sizeof(void*));
+        i + method_trace->GetLength() / 2, kRuntimePointerSize);
     int32_t line_number;
     StackHandleScope<3> hs(soa.Self());
     auto class_name_object(hs.NewHandle<mirror::String>(nullptr));
@@ -2223,7 +2226,7 @@
         }
       }
     }
-    const char* method_name = method->GetInterfaceMethodIfProxy(sizeof(void*))->GetName();
+    const char* method_name = method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetName();
     CHECK(method_name != nullptr);
     Handle<mirror::String> method_name_object(
         hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), method_name)));
@@ -2393,21 +2396,23 @@
   std::string str(ss.str());
   // log to stderr for debugging command line processes
   std::cerr << str;
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // log to logcat for debugging frameworks processes
   LOG(INFO) << str;
 #endif
 }
 
 // Explicitly instantiate 32 and 64bit thread offset dumping support.
-template void Thread::DumpThreadOffset<4>(std::ostream& os, uint32_t offset);
-template void Thread::DumpThreadOffset<8>(std::ostream& os, uint32_t offset);
+template
+void Thread::DumpThreadOffset<PointerSize::k32>(std::ostream& os, uint32_t offset);
+template
+void Thread::DumpThreadOffset<PointerSize::k64>(std::ostream& os, uint32_t offset);
 
-template<size_t ptr_size>
+template<PointerSize ptr_size>
 void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) {
 #define DO_THREAD_OFFSET(x, y) \
-    if (offset == x.Uint32Value()) { \
-      os << y; \
+    if (offset == (x).Uint32Value()) { \
+      os << (y); \
       return; \
     }
   DO_THREAD_OFFSET(ThreadFlagsOffset<ptr_size>(), "state_and_flags")
@@ -2566,9 +2571,41 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
-  QUICK_ENTRY_POINT_INFO(pReadBarrierMark)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg00)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg01)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg02)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg03)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg04)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg05)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg06)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg07)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg08)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg09)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg10)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg11)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg12)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg13)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg14)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg15)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg16)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg17)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg18)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg19)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg20)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg21)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg22)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg23)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg24)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg25)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg26)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg27)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg28)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg29)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
   QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
+
+  QUICK_ENTRY_POINT_INFO(pJniMethodFastStart)
+  QUICK_ENTRY_POINT_INFO(pJniMethodFastEnd)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
@@ -2578,38 +2615,42 @@
   // Get exception from thread.
   mirror::Throwable* exception = GetException();
   CHECK(exception != nullptr);
-  bool is_deoptimization = (exception == GetDeoptimizationException());
-  if (!is_deoptimization) {
-    // This is a real exception: let the instrumentation know about it.
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-    if (instrumentation->HasExceptionCaughtListeners() &&
-        IsExceptionThrownByCurrentMethod(exception)) {
-      // Instrumentation may cause GC so keep the exception object safe.
-      StackHandleScope<1> hs(this);
-      HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
-      instrumentation->ExceptionCaughtEvent(this, exception);
-    }
-    // Does instrumentation need to deoptimize the stack?
-    // Note: we do this *after* reporting the exception to instrumentation in case it
-    // now requires deoptimization. It may happen if a debugger is attached and requests
-    // new events (single-step, breakpoint, ...) when the exception is reported.
-    is_deoptimization = Dbg::IsForcedInterpreterNeededForException(this);
-    if (is_deoptimization) {
+  if (exception == GetDeoptimizationException()) {
+    artDeoptimize(this);
+    UNREACHABLE();
+  }
+
+  // This is a real exception: let the instrumentation know about it.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->HasExceptionCaughtListeners() &&
+      IsExceptionThrownByCurrentMethod(exception)) {
+    // Instrumentation may cause GC so keep the exception object safe.
+    StackHandleScope<1> hs(this);
+    HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
+    instrumentation->ExceptionCaughtEvent(this, exception);
+  }
+  // Does instrumentation need to deoptimize the stack?
+  // Note: we do this *after* reporting the exception to instrumentation in case it
+  // now requires deoptimization. It may happen if a debugger is attached and requests
+  // new events (single-step, breakpoint, ...) when the exception is reported.
+  if (Dbg::IsForcedInterpreterNeededForException(this)) {
+    NthCallerVisitor visitor(this, 0, false);
+    visitor.WalkStack();
+    if (Runtime::Current()->IsDeoptimizeable(visitor.caller_pc)) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
       PushDeoptimizationContext(
           JValue(), /*is_reference */ false, /* from_code */ false, exception);
+      artDeoptimize(this);
+      UNREACHABLE();
     }
   }
+
   // Don't leave exception visible while we try to find the handler, which may cause class
   // resolution.
   ClearException();
-  QuickExceptionHandler exception_handler(this, is_deoptimization);
-  if (is_deoptimization) {
-    exception_handler.DeoptimizeStack();
-  } else {
-    exception_handler.FindCatch(exception);
-  }
+  QuickExceptionHandler exception_handler(this, false);
+  exception_handler.FindCatch(exception);
   exception_handler.UpdateInstrumentationStack();
   exception_handler.DoLongJump();
 }
@@ -3019,7 +3060,6 @@
   mirror::Throwable* pending_exception = nullptr;
   bool from_code = false;
   PopDeoptimizationContext(result, &pending_exception, &from_code);
-  CHECK(!from_code) << "Deoptimizing from code should be done with single frame deoptimization";
   SetTopOfStack(nullptr);
   SetTopOfShadowStack(shadow_frame);
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 582a0cd..1c2d4ab 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -28,6 +28,7 @@
 #include "arch/context.h"
 #include "arch/instruction_set.h"
 #include "atomic.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "entrypoints/jni/jni_entrypoints.h"
@@ -110,7 +111,6 @@
 enum class StackedShadowFrameType {
   kShadowFrameUnderConstruction,
   kDeoptimizationShadowFrame,
-  kSingleFrameDeoptimizationShadowFrame
 };
 
 // This should match RosAlloc::kNumThreadLocalSizeBrackets.
@@ -180,7 +180,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Translates 172 to pAllocArrayFromCode and so on.
-  template<size_t size_of_pointers>
+  template<PointerSize size_of_pointers>
   static void DumpThreadOffset(std::ostream& os, uint32_t offset);
 
   // Dumps a one-line summary of thread state (used for operator<<).
@@ -288,6 +288,9 @@
 
   void AssertThreadSuspensionIsAllowable(bool check_locks = true) const;
 
+  // Return true if thread suspension is allowable.
+  bool IsThreadSuspensionAllowable() const;
+
   bool IsDaemon() const {
     return tls32_.daemon;
   }
@@ -530,21 +533,21 @@
   // Offsets of various members of native Thread class, used by compiled code.
   //
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThinLockIdOffset() {
     return ThreadOffset<pointer_size>(
         OFFSETOF_MEMBER(Thread, tls32_) +
         OFFSETOF_MEMBER(tls_32bit_sized_values, thin_lock_thread_id));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadFlagsOffset() {
     return ThreadOffset<pointer_size>(
         OFFSETOF_MEMBER(Thread, tls32_) +
         OFFSETOF_MEMBER(tls_32bit_sized_values, state_and_flags));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> IsGcMarkingOffset() {
     return ThreadOffset<pointer_size>(
         OFFSETOF_MEMBER(Thread, tls32_) +
@@ -555,121 +558,125 @@
   void DeoptimizeWithDeoptimizationException(JValue* result) SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
     size_t base = OFFSETOF_MEMBER(Thread, tlsPtr_);
     size_t scale;
     size_t shrink;
-    if (pointer_size == sizeof(void*)) {
+    if (pointer_size == kRuntimePointerSize) {
       scale = 1;
       shrink = 1;
-    } else if (pointer_size > sizeof(void*)) {
-      scale = pointer_size / sizeof(void*);
+    } else if (pointer_size > kRuntimePointerSize) {
+      scale = static_cast<size_t>(pointer_size) / static_cast<size_t>(kRuntimePointerSize);
       shrink = 1;
     } else {
-      DCHECK_GT(sizeof(void*), pointer_size);
+      DCHECK_GT(kRuntimePointerSize, pointer_size);
       scale = 1;
-      shrink = sizeof(void*) / pointer_size;
+      shrink = static_cast<size_t>(kRuntimePointerSize) / static_cast<size_t>(pointer_size);
     }
     return ThreadOffset<pointer_size>(base + ((tls_ptr_offset * scale) / shrink));
   }
 
  public:
   static uint32_t QuickEntryPointOffsetWithSize(size_t quick_entrypoint_offset,
-                                                size_t pointer_size) {
-    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
-    if (pointer_size == 4) {
-      return QuickEntryPointOffset<4>(quick_entrypoint_offset).Uint32Value();
+                                                PointerSize pointer_size) {
+    if (pointer_size == PointerSize::k32) {
+      return QuickEntryPointOffset<PointerSize::k32>(quick_entrypoint_offset).
+          Uint32Value();
     } else {
-      return QuickEntryPointOffset<8>(quick_entrypoint_offset).Uint32Value();
+      return QuickEntryPointOffset<PointerSize::k64>(quick_entrypoint_offset).
+          Uint32Value();
     }
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> QuickEntryPointOffset(size_t quick_entrypoint_offset) {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, quick_entrypoints) + quick_entrypoint_offset);
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> JniEntryPointOffset(size_t jni_entrypoint_offset) {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, jni_entrypoints) + jni_entrypoint_offset);
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> SelfOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, self));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> MterpCurrentIBaseOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_current_ibase));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> MterpDefaultIBaseOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_default_ibase));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> MterpAltIBaseOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_alt_ibase));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ExceptionOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, exception));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> PeerOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, opeer));
   }
 
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> CardTableOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, card_table));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadSuspendTriggerOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, suspend_trigger));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalPosOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_pos));
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                thread_local_pos));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalEndOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_end));
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                thread_local_end));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalObjectsOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects));
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                thread_local_objects));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> RosAllocRunsOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
                                                                 rosalloc_runs));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalAllocStackTopOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
                                                                 thread_local_alloc_stack_top));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadLocalAllocStackEndOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
                                                                 thread_local_alloc_stack_end));
@@ -711,19 +718,19 @@
     return tlsPtr_.stack_end == tlsPtr_.stack_begin;
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> StackEndOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, stack_end));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> JniEnvOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, jni_env));
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> TopOfManagedStackOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, managed_stack) +
@@ -750,7 +757,7 @@
     return tlsPtr_.managed_stack.PopShadowFrame();
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> TopShadowFrameOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, managed_stack) +
@@ -792,7 +799,7 @@
     return handle_scope;
   }
 
-  template<size_t pointer_size>
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> TopHandleScopeOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
                                                                 top_handle_scope));
@@ -1061,6 +1068,8 @@
     return tlsPtr_.mterp_alt_ibase;
   }
 
+  // Notify that a signal is being handled. This is to protect us from doing recursive
+  // NPE handling after a SIGSEGV.
   void NoteSignalBeingHandled() {
     if (tls32_.handling_signal_) {
       LOG(FATAL) << "Detected signal while processing a signal";
@@ -1076,8 +1085,12 @@
     return tlsPtr_.nested_signal_state;
   }
 
-  bool IsSuspendedAtSuspendCheck() const {
-    return tls32_.suspended_at_suspend_check;
+  bool IsTransitioningToRunnable() const {
+    return tls32_.is_transitioning_to_runnable;
+  }
+
+  void SetIsTransitioningToRunnable(bool value) {
+    tls32_.is_transitioning_to_runnable = value;
   }
 
   void PushVerifier(verifier::MethodVerifier* verifier);
@@ -1119,7 +1132,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Avoid use, callers should use SetState. Used only by SignalCatcher::HandleSigQuit, ~Thread and
-  // Dbg::Disconnected.
+  // Dbg::ManageDeoptimization.
   ThreadState SetStateUnsafe(ThreadState new_state) {
     ThreadState old_state = GetState();
     if (old_state == kRunnable && new_state != kRunnable) {
@@ -1221,9 +1234,6 @@
 
   static void ThreadExitCallback(void* arg);
 
-  // Maximum number of checkpoint functions.
-  static constexpr uint32_t kMaxCheckpoints = 3;
-
   // Maximum number of suspend barriers.
   static constexpr uint32_t kMaxSuspendBarriers = 3;
 
@@ -1258,7 +1268,7 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false),
-      suspended_at_suspend_check(false), ready_for_debug_invoke(false),
+      is_transitioning_to_runnable(false), ready_for_debug_invoke(false),
       debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true),
       disable_thread_flip_count(0) {
     }
@@ -1300,10 +1310,10 @@
     // True if signal is being handled by this thread.
     bool32_t handling_signal_;
 
-    // True if the thread is suspended in FullSuspendCheck(). This is
-    // used to distinguish runnable threads that are suspended due to
-    // a normal suspend check from other threads.
-    bool32_t suspended_at_suspend_check;
+    // True if the thread is in TransitionFromSuspendedToRunnable(). This is used to distinguish the
+    // non-runnable threads (eg. kNative, kWaiting) that are about to transition to runnable from
+    // the rest of them.
+    bool32_t is_transitioning_to_runnable;
 
     // True if the thread has been suspended by a debugger event. This is
     // used to invoke method from the debugger which is only allowed when
@@ -1353,8 +1363,8 @@
       instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
-      last_no_thread_suspension_cause(nullptr), thread_local_objects(0),
-      thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
+      last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
+      thread_local_objects(0), thread_local_pos(nullptr), thread_local_end(nullptr),
       mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr),
       thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
       nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr),
@@ -1453,9 +1463,9 @@
     // If no_thread_suspension_ is > 0, what is causing that assertion.
     const char* last_no_thread_suspension_cause;
 
-    // Pending checkpoint function or null if non-pending. Installation guarding by
-    // Locks::thread_suspend_count_lock_.
-    Closure* checkpoint_functions[kMaxCheckpoints];
+    // Pending checkpoint function or null if non-pending. If this checkpoint is set and someone\
+    // requests another checkpoint, it goes to the checkpoint overflow list.
+    Closure* checkpoint_function GUARDED_BY(Locks::thread_suspend_count_lock_);
 
     // Pending barriers that require passing or NULL if non-pending. Installation guarding by
     // Locks::thread_suspend_count_lock_.
@@ -1469,8 +1479,8 @@
     QuickEntryPoints quick_entrypoints;
 
     // Thread-local allocation pointer.
-    size_t thread_local_objects;
     uint8_t* thread_local_start;
+    size_t thread_local_objects;
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
@@ -1518,6 +1528,9 @@
   // Debug disable read barrier count, only is checked for debug builds and only in the runtime.
   uint8_t debug_disallow_read_barrier_ = 0;
 
+  // Pending extra checkpoints if checkpoint_function_ is already used.
+  std::list<Closure*> checkpoint_overflow_ GUARDED_BY(Locks::thread_suspend_count_lock_);
+
   friend class Dbg;  // For SetStateUnsafe.
   friend class gc::collector::SemiSpace;  // For getting stack traces.
   friend class Runtime;  // For CreatePeer.
@@ -1579,6 +1592,26 @@
   Thread* const self_;
 };
 
+class ScopedTransitioningToRunnable : public ValueObject {
+ public:
+  explicit ScopedTransitioningToRunnable(Thread* self)
+      : self_(self) {
+    DCHECK_EQ(self, Thread::Current());
+    if (kUseReadBarrier) {
+      self_->SetIsTransitioningToRunnable(true);
+    }
+  }
+
+  ~ScopedTransitioningToRunnable() {
+    if (kUseReadBarrier) {
+      self_->SetIsTransitioningToRunnable(false);
+    }
+  }
+
+ private:
+  Thread* const self_;
+};
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread);
 
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index 9563b99..b922d94 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -44,7 +44,7 @@
 
 void Thread::SetUpAlternateSignalStack() {
   // Create and set an alternate signal stack.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   LOG(FATAL) << "Invalid use of alternate signal stack on Android";
 #endif
   stack_t ss;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 7f070e7..ab1f198 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -35,6 +35,7 @@
 #include "jni_internal.h"
 #include "lock_word.h"
 #include "monitor.h"
+#include "native_stack_dump.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "trace.h"
@@ -405,6 +406,8 @@
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
   CHECK_NE(self->GetState(), kRunnable);
 
+  collector->GetHeap()->ThreadFlipBegin(self);  // Sync with JNI critical calls.
+
   SuspendAllInternal(self, self, nullptr);
 
   // Run the flip callback for the collector.
@@ -414,26 +417,31 @@
   collector->RegisterPause(NanoTime() - start_time);
 
   // Resume runnable threads.
-  std::vector<Thread*> runnable_threads;
+  size_t runnable_thread_count = 0;
   std::vector<Thread*> other_threads;
   {
+    TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings());
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     --suspend_all_count_;
     for (const auto& thread : list_) {
+      // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by
+      // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before
+      // a runnable thread runs it for itself or we run it for a suspended thread below.
+      thread->SetFlipFunction(thread_flip_visitor);
       if (thread == self) {
         continue;
       }
-      // Set the flip function for both runnable and suspended threads
-      // because Thread::DumpState/DumpJavaStack() (invoked by a
-      // checkpoint) may cause the flip function to be run for a
-      // runnable/suspended thread before a runnable threads runs it
-      // for itself or we run it for a suspended thread below.
-      thread->SetFlipFunction(thread_flip_visitor);
-      if (thread->IsSuspendedAtSuspendCheck()) {
+      // Resume early the threads that were runnable but are suspended just for this thread flip or
+      // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to
+      // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting
+      // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip),
+      ThreadState state = thread->GetState();
+      if (state == kWaitingForGcThreadFlip ||
+          thread->IsTransitioningToRunnable()) {
         // The thread will resume right after the broadcast.
         thread->ModifySuspendCount(self, -1, nullptr, false);
-        runnable_threads.push_back(thread);
+        ++runnable_thread_count;
       } else {
         other_threads.push_back(thread);
       }
@@ -441,8 +449,11 @@
     Thread::resume_cond_->Broadcast(self);
   }
 
+  collector->GetHeap()->ThreadFlipEnd(self);
+
   // Run the closure on the other threads and let them resume.
   {
+    TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings());
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
     for (const auto& thread : other_threads) {
       Closure* flip_func = thread->GetFlipFunction();
@@ -451,11 +462,15 @@
       }
     }
     // Run it for self.
-    thread_flip_visitor->Run(self);
+    Closure* flip_func = self->GetFlipFunction();
+    if (flip_func != nullptr) {
+      flip_func->Run(self);
+    }
   }
 
   // Resume other threads.
   {
+    TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : other_threads) {
       thread->ModifySuspendCount(self, -1, nullptr, false);
@@ -463,7 +478,7 @@
     Thread::resume_cond_->Broadcast(self);
   }
 
-  return runnable_threads.size() + other_threads.size() + 1;  // +1 for self.
+  return runnable_thread_count + other_threads.size() + 1;  // +1 for self.
 }
 
 void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
@@ -614,11 +629,7 @@
             PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
           }
         }
-      } else {
-        cur_val = pending_threads.LoadRelaxed();
-        CHECK_EQ(cur_val, 0);
-        break;
-      }
+      }  // else re-check pending_threads in the next iteration (this may be a spurious wake-up).
 #else
       // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
 #endif
@@ -1147,9 +1158,10 @@
 void ThreadList::SuspendAllDaemonThreadsForShutdown() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::thread_list_lock_);
   size_t daemons_left = 0;
-  {  // Tell all the daemons it's time to suspend.
+  {
+    // Tell all the daemons it's time to suspend.
+    MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : list_) {
       // This is only run after all non-daemon threads have exited, so the remainder should all be
@@ -1178,13 +1190,16 @@
   static constexpr size_t kSleepMicroseconds = 1000;
   for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) {
     bool all_suspended = true;
-    for (const auto& thread : list_) {
-      if (thread != self && thread->GetState() == kRunnable) {
-        if (!have_complained) {
-          LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
-          have_complained = true;
+    {
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      for (const auto& thread : list_) {
+        if (thread != self && thread->GetState() == kRunnable) {
+          if (!have_complained) {
+            LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
+            have_complained = true;
+          }
+          all_suspended = false;
         }
-        all_suspended = false;
       }
     }
     if (all_suspended) {
@@ -1283,7 +1298,7 @@
 
   // Clear the TLS data, so that the underlying native thread is recognizably detached.
   // (It may wish to reattach later.)
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr;
 #else
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self");
@@ -1301,6 +1316,39 @@
   }
 }
 
+void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
+  Thread* const self = Thread::Current();
+  std::vector<Thread*> threads_to_visit;
+
+  // Tell threads to suspend and copy them into list.
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (Thread* thread : list_) {
+      thread->ModifySuspendCount(self, +1, nullptr, false);
+      if (thread == self || thread->IsSuspended()) {
+        threads_to_visit.push_back(thread);
+      } else {
+        thread->ModifySuspendCount(self, -1, nullptr, false);
+      }
+    }
+  }
+
+  // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock
+  // order violations.
+  for (Thread* thread : threads_to_visit) {
+    thread->VisitRoots(visitor);
+  }
+
+  // Restore suspend counts.
+  {
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (Thread* thread : threads_to_visit) {
+      thread->ModifySuspendCount(self, -1, nullptr, false);
+    }
+  }
+}
+
 void ThreadList::VisitRoots(RootVisitor* visitor) const {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index df81ad1..49f65e1 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -144,6 +144,10 @@
   void VisitRoots(RootVisitor* visitor) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void VisitRootsForSuspendedThreads(RootVisitor* visitor)
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Return a copy of the thread list.
   std::list<Thread*> GetList() REQUIRES(Locks::thread_list_lock_) {
     return list_;
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index 2fba805..b14f340 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -61,7 +61,7 @@
 void ThreadPoolWorker::SetPthreadPriority(int priority) {
   CHECK_GE(priority, PRIO_MIN);
   CHECK_LE(priority, PRIO_MAX);
-#if defined(__ANDROID__)
+#if defined(ART_TARGET_ANDROID)
   int result = setpriority(PRIO_PROCESS, pthread_gettid_np(pthread_), priority);
   if (result != 0) {
     PLOG(ERROR) << "Failed to setpriority to :" << priority;
diff --git a/runtime/ti/agent.cc b/runtime/ti/agent.cc
new file mode 100644
index 0000000..7c0ea64
--- /dev/null
+++ b/runtime/ti/agent.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "agent.h"
+#include "java_vm_ext.h"
+#include "runtime.h"
+
+namespace art {
+namespace ti {
+
+const char* AGENT_ON_LOAD_FUNCTION_NAME = "Agent_OnLoad";
+const char* AGENT_ON_ATTACH_FUNCTION_NAME = "Agent_OnAttach";
+const char* AGENT_ON_UNLOAD_FUNCTION_NAME = "Agent_OnUnload";
+
+Agent Agent::Create(std::string arg) {
+  size_t eq = arg.find_first_of('=');
+  if (eq == std::string::npos) {
+    return Agent(arg, "");
+  } else {
+    return Agent(arg.substr(0, eq), arg.substr(eq + 1, arg.length()));
+  }
+}
+
+// TODO We need to acquire some locks probably.
+Agent::LoadError Agent::Load(/*out*/jint* call_res, /*out*/ std::string* error_msg) {
+  DCHECK(call_res != nullptr);
+  DCHECK(error_msg != nullptr);
+
+  if (IsStarted()) {
+    *error_msg = StringPrintf("the agent at %s has already been started!", name_.c_str());
+    VLOG(agents) << "err: " << *error_msg;
+    return kAlreadyStarted;
+  }
+  LoadError err = DoDlOpen(error_msg);
+  if (err != kNoError) {
+    VLOG(agents) << "err: " << *error_msg;
+    return err;
+  }
+  if (onload_ == nullptr) {
+    *error_msg = StringPrintf("Unable to start agent %s: No Agent_OnLoad function found",
+                              name_.c_str());
+    VLOG(agents) << "err: " << *error_msg;
+    return kLoadingError;
+  }
+  // Need to let the function fiddle with the array.
+  std::unique_ptr<char[]> copied_args(new char[args_.size() + 1]);
+  strcpy(copied_args.get(), args_.c_str());
+  // TODO Need to do some checks that we are at a good spot etc.
+  *call_res = onload_(static_cast<JavaVM*>(Runtime::Current()->GetJavaVM()),
+                      copied_args.get(),
+                      nullptr);
+  if (*call_res != 0) {
+    *error_msg = StringPrintf("Initialization of %s returned non-zero value of %d",
+                              name_.c_str(), *call_res);
+    VLOG(agents) << "err: " << *error_msg;
+    return kInitializationError;
+  } else {
+    return kNoError;
+  }
+}
+
+Agent::LoadError Agent::DoDlOpen(/*out*/std::string* error_msg) {
+  DCHECK(error_msg != nullptr);
+  dlopen_handle_ = dlopen(name_.c_str(), RTLD_LAZY);
+  if (dlopen_handle_ == nullptr) {
+    *error_msg = StringPrintf("Unable to dlopen %s: %s", name_.c_str(), dlerror());
+    return kLoadingError;
+  }
+
+  onload_ = reinterpret_cast<AgentOnLoadFunction>(dlsym(dlopen_handle_,
+                                                        AGENT_ON_LOAD_FUNCTION_NAME));
+  if (onload_ == nullptr) {
+    VLOG(agents) << "Unable to find 'Agent_OnLoad' symbol in " << this;
+  }
+  onattach_ = reinterpret_cast<AgentOnAttachFunction>(dlsym(dlopen_handle_,
+                                                            AGENT_ON_ATTACH_FUNCTION_NAME));
+  if (onattach_ == nullptr) {
+    VLOG(agents) << "Unable to find 'Agent_OnAttach' symbol in " << this;
+  }
+  onunload_= reinterpret_cast<AgentOnUnloadFunction>(dlsym(dlopen_handle_,
+                                                           AGENT_ON_UNLOAD_FUNCTION_NAME));
+  if (onunload_ == nullptr) {
+    VLOG(agents) << "Unable to find 'Agent_OnUnload' symbol in " << this;
+  }
+  return kNoError;
+}
+
+// TODO Lock some stuff probably.
+void Agent::Unload() {
+  if (dlopen_handle_ != nullptr) {
+    if (onunload_ != nullptr) {
+      onunload_(Runtime::Current()->GetJavaVM());
+    }
+    dlclose(dlopen_handle_);
+    dlopen_handle_ = nullptr;
+  } else {
+    VLOG(agents) << this << " is not currently loaded!";
+  }
+}
+
+Agent::Agent(const Agent& other)
+  : name_(other.name_),
+    args_(other.args_),
+    dlopen_handle_(other.dlopen_handle_),
+    onload_(other.onload_),
+    onattach_(other.onattach_),
+    onunload_(other.onunload_) {
+  if (other.dlopen_handle_ != nullptr) {
+    dlopen(other.name_.c_str(), 0);
+  }
+}
+
+Agent::~Agent() {
+  if (dlopen_handle_ != nullptr) {
+    dlclose(dlopen_handle_);
+  }
+}
+
+std::ostream& operator<<(std::ostream &os, const Agent* m) {
+  return os << *m;
+}
+
+std::ostream& operator<<(std::ostream &os, Agent const& m) {
+  return os << "Agent { name=\"" << m.name_ << "\", args=\"" << m.args_ << "\", handle="
+            << m.dlopen_handle_ << " }";
+}
+
+}  // namespace ti
+}  // namespace art
diff --git a/runtime/ti/agent.h b/runtime/ti/agent.h
new file mode 100644
index 0000000..521e21e
--- /dev/null
+++ b/runtime/ti/agent.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_TI_AGENT_H_
+#define ART_RUNTIME_TI_AGENT_H_
+
+#include <dlfcn.h>
+#include <jni.h>  // for jint, JavaVM* etc declarations
+
+#include "base/stringprintf.h"
+#include "runtime.h"
+#include "utils.h"
+
+namespace art {
+namespace ti {
+
+using AgentOnLoadFunction = jint (*)(JavaVM*, const char*, void*);
+using AgentOnAttachFunction = jint (*)(JavaVM*, const char*, void*);
+using AgentOnUnloadFunction = void (*)(JavaVM*);
+
+class Agent {
+ public:
+  enum LoadError {
+    kNoError,              // No error occurred..
+    kAlreadyStarted,       // The agent has already been loaded.
+    kLoadingError,         // dlopen or dlsym returned an error.
+    kInitializationError,  // The entrypoint did not return 0. This might require an abort.
+  };
+
+  bool IsStarted() const {
+    return dlopen_handle_ != nullptr;
+  }
+
+  const std::string& GetName() const {
+    return name_;
+  }
+
+  const std::string& GetArgs() const {
+    return args_;
+  }
+
+  bool HasArgs() const {
+    return !GetArgs().empty();
+  }
+
+  // TODO We need to acquire some locks probably.
+  LoadError Load(/*out*/jint* call_res, /*out*/std::string* error_msg);
+
+  // TODO We need to acquire some locks probably.
+  void Unload();
+
+  // Tries to attach the agent using its OnAttach method. Returns true on success.
+  // TODO We need to acquire some locks probably.
+  LoadError Attach(std::string* error_msg) {
+    // TODO
+    *error_msg = "Attach has not yet been implemented!";
+    return kLoadingError;
+  }
+
+  static Agent Create(std::string arg);
+
+  static Agent Create(std::string name, std::string args) {
+    return Agent(name, args);
+  }
+
+  ~Agent();
+
+  // We need move constructor and copy for vectors
+  Agent(const Agent& other);
+
+  Agent(Agent&& other)
+      : name_(other.name_),
+        args_(other.args_),
+        dlopen_handle_(nullptr),
+        onload_(nullptr),
+        onattach_(nullptr),
+        onunload_(nullptr) {
+    other.dlopen_handle_ = nullptr;
+    other.onload_ = nullptr;
+    other.onattach_ = nullptr;
+    other.onunload_ = nullptr;
+  }
+
+  // We don't need an operator=
+  void operator=(const Agent&) = delete;
+
+ private:
+  Agent(std::string name, std::string args)
+      : name_(name),
+        args_(args),
+        dlopen_handle_(nullptr),
+        onload_(nullptr),
+        onattach_(nullptr),
+        onunload_(nullptr) { }
+
+  LoadError DoDlOpen(/*out*/std::string* error_msg);
+
+  const std::string name_;
+  const std::string args_;
+  void* dlopen_handle_;
+
+  // The entrypoints.
+  AgentOnLoadFunction onload_;
+  AgentOnAttachFunction onattach_;
+  AgentOnUnloadFunction onunload_;
+
+  friend std::ostream& operator<<(std::ostream &os, Agent const& m);
+};
+
+std::ostream& operator<<(std::ostream &os, Agent const& m);
+std::ostream& operator<<(std::ostream &os, const Agent* m);
+
+}  // namespace ti
+}  // namespace art
+
+#endif  // ART_RUNTIME_TI_AGENT_H_
+
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b879355..56a26de 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -21,6 +21,7 @@
 
 #include "art_method-inl.h"
 #include "base/casts.h"
+#include "base/enums.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
@@ -645,31 +646,11 @@
   }
 }
 
-static void GetVisitedMethodsFromBitSets(
-    const std::map<const DexFile*, DexIndexBitSet*>& seen_methods,
-    std::set<ArtMethod*>* visited_methods) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Thread* const self = Thread::Current();
-  for (auto& e : seen_methods) {
-    DexIndexBitSet* bit_set = e.second;
-    // TODO: Visit trace methods as roots.
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(self, *e.first, false);
-    for (uint32_t i = 0; i < bit_set->size(); ++i) {
-      if ((*bit_set)[i]) {
-        visited_methods->insert(dex_cache->GetResolvedMethod(i, sizeof(void*)));
-      }
-    }
-  }
-}
-
 void Trace::FinishTracing() {
   size_t final_offset = 0;
 
   std::set<ArtMethod*> visited_methods;
   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
-    // Write the secondary file with all the method names.
-    GetVisitedMethodsFromBitSets(seen_methods_, &visited_methods);
-
     // Clean up.
     STLDeleteValues(&seen_methods_);
   } else {
@@ -715,8 +696,8 @@
   std::string header(os.str());
 
   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
-    File file;
-    if (!file.Open(streaming_file_name_ + ".sec", O_CREAT | O_WRONLY)) {
+    File file(streaming_file_name_ + ".sec", O_CREAT | O_WRONLY, true);
+    if (!file.IsOpened()) {
       LOG(WARNING) << "Could not open secondary trace file!";
       return;
     }
@@ -850,11 +831,6 @@
 bool Trace::RegisterMethod(ArtMethod* method) {
   mirror::DexCache* dex_cache = method->GetDexCache();
   const DexFile* dex_file = dex_cache->GetDexFile();
-  auto* resolved_method = dex_cache->GetResolvedMethod(method->GetDexMethodIndex(), sizeof(void*));
-  if (resolved_method != method) {
-    DCHECK(resolved_method == nullptr);
-    dex_cache->SetResolvedMethod(method->GetDexMethodIndex(), method, sizeof(void*));
-  }
   if (seen_methods_.find(dex_file) == seen_methods_.end()) {
     seen_methods_.insert(std::make_pair(dex_file, new DexIndexBitSet()));
   }
@@ -869,7 +845,7 @@
 bool Trace::RegisterThread(Thread* thread) {
   pid_t tid = thread->GetTid();
   CHECK_LT(0U, static_cast<uint32_t>(tid));
-  CHECK_LT(static_cast<uint32_t>(tid), 65536U);
+  CHECK_LT(static_cast<uint32_t>(tid), kMaxThreadIdNumber);
 
   if (!(*seen_threads_)[tid]) {
     seen_threads_->set(tid);
@@ -879,9 +855,8 @@
 }
 
 std::string Trace::GetMethodLine(ArtMethod* method) {
-  method = method->GetInterfaceMethodIfProxy(sizeof(void*));
-  return StringPrintf("%p\t%s\t%s\t%s\t%s\n",
-                      reinterpret_cast<void*>((EncodeTraceMethod(method) << TraceActionBits)),
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+  return StringPrintf("%#x\t%s\t%s\t%s\t%s\n", (EncodeTraceMethod(method) << TraceActionBits),
       PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
       method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
 }
diff --git a/runtime/trace.h b/runtime/trace.h
index 80f1a4c..9b29fb9 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -41,7 +41,9 @@
 class Thread;
 
 using DexIndexBitSet = std::bitset<65536>;
-using ThreadIDBitSet = std::bitset<65536>;
+
+constexpr size_t kMaxThreadIdNumber = kIsTargetBuild ? 65536U : 1048576U;
+using ThreadIDBitSet = std::bitset<kMaxThreadIdNumber>;
 
 enum TracingMode {
   kTracingInactive,
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 6a50b8e..c5b4445 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -44,22 +44,15 @@
 #if defined(__APPLE__)
 #include "AvailabilityMacros.h"  // For MAC_OS_X_VERSION_MAX_ALLOWED
 #include <sys/syscall.h>
+#include <crt_externs.h>
 #endif
 
-// For DumpNativeStack.
-#include <backtrace/Backtrace.h>
-#include <backtrace/BacktraceMap.h>
-
 #if defined(__linux__)
 #include <linux/unistd.h>
 #endif
 
 namespace art {
 
-#if defined(__linux__)
-static constexpr bool kUseAddr2line = !kIsTargetBuild;
-#endif
-
 pid_t GetTid() {
 #if defined(__APPLE__)
   uint64_t owner;
@@ -136,8 +129,8 @@
 }
 
 bool ReadFileToString(const std::string& file_name, std::string* result) {
-  File file;
-  if (!file.Open(file_name, O_RDONLY)) {
+  File file(file_name, O_RDONLY, false);
+  if (!file.IsOpened()) {
     return false;
   }
 
@@ -155,8 +148,8 @@
 }
 
 bool PrintFileToLog(const std::string& file_name, LogSeverity level) {
-  File file;
-  if (!file.Open(file_name, O_RDONLY)) {
+  File file(file_name, O_RDONLY, false);
+  if (!file.IsOpened()) {
     return false;
   }
 
@@ -1026,210 +1019,6 @@
   return "";
 }
 
-#if defined(__linux__)
-
-ALWAYS_INLINE
-static inline void WritePrefix(std::ostream* os, const char* prefix, bool odd) {
-  if (prefix != nullptr) {
-    *os << prefix;
-  }
-  *os << "  ";
-  if (!odd) {
-    *os << " ";
-  }
-}
-
-static bool RunCommand(std::string cmd, std::ostream* os, const char* prefix) {
-  FILE* stream = popen(cmd.c_str(), "r");
-  if (stream) {
-    if (os != nullptr) {
-      bool odd_line = true;               // We indent them differently.
-      bool wrote_prefix = false;          // Have we already written a prefix?
-      constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
-                                          // alt stack, but just to be sure...
-      char buffer[kMaxBuffer];
-      while (!feof(stream)) {
-        if (fgets(buffer, kMaxBuffer, stream) != nullptr) {
-          // Split on newlines.
-          char* tmp = buffer;
-          for (;;) {
-            char* new_line = strchr(tmp, '\n');
-            if (new_line == nullptr) {
-              // Print the rest.
-              if (*tmp != 0) {
-                if (!wrote_prefix) {
-                  WritePrefix(os, prefix, odd_line);
-                }
-                wrote_prefix = true;
-                *os << tmp;
-              }
-              break;
-            }
-            if (!wrote_prefix) {
-              WritePrefix(os, prefix, odd_line);
-            }
-            char saved = *(new_line + 1);
-            *(new_line + 1) = 0;
-            *os << tmp;
-            *(new_line + 1) = saved;
-            tmp = new_line + 1;
-            odd_line = !odd_line;
-            wrote_prefix = false;
-          }
-        }
-      }
-    }
-    pclose(stream);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static void Addr2line(const std::string& map_src, uintptr_t offset, std::ostream& os,
-                      const char* prefix) {
-  std::string cmdline(StringPrintf("addr2line --functions --inlines --demangle -e %s %zx",
-                                   map_src.c_str(), offset));
-  RunCommand(cmdline.c_str(), &os, prefix);
-}
-
-static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
-  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
-      method->GetEntryPointFromQuickCompiledCode()));
-  if (code == 0) {
-    return pc == 0;
-  }
-  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
-  return code <= pc && pc <= (code + code_size);
-}
-#endif
-
-void DumpNativeStack(std::ostream& os, pid_t tid, BacktraceMap* existing_map, const char* prefix,
-    ArtMethod* current_method, void* ucontext_ptr) {
-#if __linux__
-  // b/18119146
-  if (RUNNING_ON_MEMORY_TOOL != 0) {
-    return;
-  }
-
-  BacktraceMap* map = existing_map;
-  std::unique_ptr<BacktraceMap> tmp_map;
-  if (map == nullptr) {
-    tmp_map.reset(BacktraceMap::Create(getpid()));
-    map = tmp_map.get();
-  }
-  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
-  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
-    os << prefix << "(backtrace::Unwind failed for thread " << tid
-       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
-    return;
-  } else if (backtrace->NumFrames() == 0) {
-    os << prefix << "(no native stack frames for thread " << tid << ")\n";
-    return;
-  }
-
-  // Check whether we have and should use addr2line.
-  bool use_addr2line;
-  if (kUseAddr2line) {
-    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
-    // and print to stderr.
-    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h", nullptr, nullptr);
-  } else {
-    use_addr2line = false;
-  }
-
-  for (Backtrace::const_iterator it = backtrace->begin();
-       it != backtrace->end(); ++it) {
-    // We produce output like this:
-    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
-    // In order for parsing tools to continue to function, the stack dump
-    // format must at least adhere to this format:
-    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
-    // The parsers require a single space before and after pc, and two spaces
-    // after the <RELATIVE_ADDR>. There can be any prefix data before the
-    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
-    os << prefix << StringPrintf("#%02zu pc ", it->num);
-    bool try_addr2line = false;
-    if (!BacktraceMap::IsValid(it->map)) {
-      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
-                                                            : "%08" PRIxPTR "  ???",
-                         it->pc);
-    } else {
-      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
-                                                            : "%08" PRIxPTR "  ",
-                         BacktraceMap::GetRelativePc(it->map, it->pc));
-      os << it->map.name;
-      os << " (";
-      if (!it->func_name.empty()) {
-        os << it->func_name;
-        if (it->func_offset != 0) {
-          os << "+" << it->func_offset;
-        }
-        try_addr2line = true;
-      } else if (current_method != nullptr &&
-          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
-          PcIsWithinQuickCode(current_method, it->pc)) {
-        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
-        os << JniLongName(current_method) << "+"
-           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
-      } else {
-        os << "???";
-      }
-      os << ")";
-    }
-    os << "\n";
-    if (try_addr2line && use_addr2line) {
-      Addr2line(it->map.name, it->pc - it->map.start, os, prefix);
-    }
-  }
-#else
-  UNUSED(os, tid, existing_map, prefix, current_method, ucontext_ptr);
-#endif
-}
-
-#if defined(__APPLE__)
-
-// TODO: is there any way to get the kernel stack on Mac OS?
-void DumpKernelStack(std::ostream&, pid_t, const char*, bool) {}
-
-#else
-
-void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
-  if (tid == GetTid()) {
-    // There's no point showing that we're reading our stack out of /proc!
-    return;
-  }
-
-  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
-  std::string kernel_stack;
-  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
-    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
-    return;
-  }
-
-  std::vector<std::string> kernel_stack_frames;
-  Split(kernel_stack, '\n', &kernel_stack_frames);
-  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
-  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
-  kernel_stack_frames.pop_back();
-  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
-    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
-    // into "futex_wait_queue_me+0xcd/0x110".
-    const char* text = kernel_stack_frames[i].c_str();
-    const char* close_bracket = strchr(text, ']');
-    if (close_bracket != nullptr) {
-      text = close_bracket + 2;
-    }
-    os << prefix;
-    if (include_count) {
-      os << StringPrintf("#%02zd ", i);
-    }
-    os << text << "\n";
-  }
-}
-
-#endif
-
 const char* GetAndroidRoot() {
   const char* android_root = getenv("ANDROID_ROOT");
   if (android_root == nullptr) {
@@ -1299,58 +1088,18 @@
   }
 }
 
-static std::string GetDalvikCacheImpl(const char* subdir,
-                                      const bool create_if_absent,
-                                      const bool abort_on_error) {
+std::string GetDalvikCache(const char* subdir) {
   CHECK(subdir != nullptr);
   const char* android_data = GetAndroidData();
   const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   const std::string dalvik_cache = dalvik_cache_root + subdir;
   if (!OS::DirectoryExists(dalvik_cache.c_str())) {
-    if (!create_if_absent) {
-      // TODO: Check callers. Traditional behavior is to not to abort, even when abort_on_error.
-      return "";
-    }
-
-    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
-    if (strcmp(android_data, "/data") == 0) {
-      if (abort_on_error) {
-        LOG(FATAL) << "Failed to find dalvik-cache directory " << dalvik_cache
-                   << ", cannot create /data dalvik-cache.";
-        UNREACHABLE();
-      }
-      return "";
-    }
-
-    int result = mkdir(dalvik_cache_root.c_str(), 0700);
-    if (result != 0 && errno != EEXIST) {
-      if (abort_on_error) {
-        PLOG(FATAL) << "Failed to create dalvik-cache root directory " << dalvik_cache_root;
-        UNREACHABLE();
-      }
-      return "";
-    }
-
-    result = mkdir(dalvik_cache.c_str(), 0700);
-    if (result != 0) {
-      if (abort_on_error) {
-        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
-        UNREACHABLE();
-      }
-      return "";
-    }
+    // TODO: Check callers. Traditional behavior is to not abort.
+    return "";
   }
   return dalvik_cache;
 }
 
-std::string GetDalvikCache(const char* subdir, const bool create_if_absent) {
-  return GetDalvikCacheImpl(subdir, create_if_absent, false);
-}
-
-std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
-  return GetDalvikCacheImpl(subdir, create_if_absent, true);
-}
-
 bool GetDalvikCacheFilename(const char* location, const char* cache_location,
                             std::string* filename, std::string* error_msg) {
   if (location[0] != '/') {
@@ -1367,15 +1116,6 @@
   return true;
 }
 
-std::string GetDalvikCacheFilenameOrDie(const char* location, const char* cache_location) {
-  std::string ret;
-  std::string error_msg;
-  if (!GetDalvikCacheFilename(location, cache_location, &ret, &error_msg)) {
-    LOG(FATAL) << error_msg;
-  }
-  return ret;
-}
-
 static void InsertIsaDirectory(const InstructionSet isa, std::string* filename) {
   // in = /foo/bar/baz
   // out = /foo/bar/<isa>/baz
diff --git a/runtime/utils.h b/runtime/utils.h
index c1e88a4..84079e2 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -242,21 +242,6 @@
 // implementation-defined limit.
 void SetThreadName(const char* thread_name);
 
-// Dumps the native stack for thread 'tid' to 'os'.
-void DumpNativeStack(std::ostream& os,
-                     pid_t tid,
-                     BacktraceMap* map = nullptr,
-                     const char* prefix = "",
-                     ArtMethod* current_method = nullptr,
-                     void* ucontext = nullptr)
-    NO_THREAD_SAFETY_ANALYSIS;
-
-// Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
-void DumpKernelStack(std::ostream& os,
-                     pid_t tid,
-                     const char* prefix = "",
-                     bool include_count = true);
-
 // Find $ANDROID_ROOT, /system, or abort.
 const char* GetAndroidRoot();
 
@@ -266,11 +251,8 @@
 const char* GetAndroidDataSafe(std::string* error_msg);
 
 // Returns the dalvik-cache location, with subdir appended. Returns the empty string if the cache
-// could not be found (or created).
-std::string GetDalvikCache(const char* subdir, bool create_if_absent = true);
-// Returns the dalvik-cache location, or dies trying. subdir will be
-// appended to the cache location.
-std::string GetDalvikCacheOrDie(const char* subdir, bool create_if_absent = true);
+// could not be found.
+std::string GetDalvikCache(const char* subdir);
 // Return true if we found the dalvik cache and stored it in the dalvik_cache argument.
 // have_android_data will be set to true if we have an ANDROID_DATA that exists,
 // dalvik_cache_exists will be true if there is a dalvik-cache directory that is present.
@@ -282,10 +264,6 @@
 // rooted at cache_location.
 bool GetDalvikCacheFilename(const char* file_location, const char* cache_location,
                             std::string* filename, std::string* error_msg);
-// Returns the absolute dalvik-cache path for a DexFile or OatFile, or
-// dies trying. The path returned will be rooted at cache_location.
-std::string GetDalvikCacheFilenameOrDie(const char* file_location,
-                                        const char* cache_location);
 
 // Returns the system location for an image
 std::string GetSystemImageFilename(const char* location, InstructionSet isa);
@@ -382,13 +360,19 @@
 #endif
 
 template <typename T>
-T GetRandomNumber(T min, T max) {
+static T GetRandomNumber(T min, T max) {
   CHECK_LT(min, max);
   std::uniform_int_distribution<T> dist(min, max);
   RNG rng;
   return dist(rng);
 }
 
+// All of the elements from one container to another.
+template <typename Dest, typename Src>
+static void AddAll(Dest& dest, const Src& src) {
+  dest.insert(src.begin(), src.end());
+}
+
 // Return the file size in bytes or -1 if the file does not exists.
 int64_t GetFileSizeBytes(const std::string& filename);
 
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index 6922564..4c63156 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -23,11 +23,12 @@
 #include "base/logging.h"
 #include "gc_root.h"
 #include "globals.h"
+#include "mirror/dex_cache.h"
 #include "primitive.h"
 
 namespace art {
 
-inline DexCacheArraysLayout::DexCacheArraysLayout(size_t pointer_size,
+inline DexCacheArraysLayout::DexCacheArraysLayout(PointerSize pointer_size,
                                                   const DexFile::Header& header)
     : pointer_size_(pointer_size),
       /* types_offset_ is always 0u, so it's constexpr */
@@ -39,30 +40,33 @@
           RoundUp(strings_offset_ + StringsSize(header.string_ids_size_), FieldsAlignment())),
       size_(
           RoundUp(fields_offset_ + FieldsSize(header.field_ids_size_), Alignment())) {
-  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
 }
 
-inline DexCacheArraysLayout::DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file)
+inline DexCacheArraysLayout::DexCacheArraysLayout(PointerSize pointer_size, const DexFile* dex_file)
     : DexCacheArraysLayout(pointer_size, dex_file->GetHeader()) {
 }
 
-inline size_t DexCacheArraysLayout::Alignment() const {
+inline constexpr size_t DexCacheArraysLayout::Alignment() {
   // GcRoot<> alignment is 4, i.e. lower than or equal to the pointer alignment.
   static_assert(alignof(GcRoot<mirror::Class>) == 4, "Expecting alignof(GcRoot<>) == 4");
-  static_assert(alignof(GcRoot<mirror::String>) == 4, "Expecting alignof(GcRoot<>) == 4");
-  DCHECK(pointer_size_ == 4u || pointer_size_ == 8u);
-  // Pointer alignment is the same as pointer size.
-  return pointer_size_;
+  static_assert(alignof(mirror::StringDexCacheType) == 8, "Expecting alignof(StringDexCacheType) == 8");
+  return alignof(mirror::StringDexCacheType);
+}
+
+template <typename T>
+static constexpr PointerSize GcRootAsPointerSize() {
+  return ConvertToPointerSize(sizeof(GcRoot<T>));
 }
 
 inline size_t DexCacheArraysLayout::TypeOffset(uint32_t type_idx) const {
-  return types_offset_ + ElementOffset(sizeof(GcRoot<mirror::Class>), type_idx);
+  return types_offset_ + ElementOffset(GcRootAsPointerSize<mirror::Class>(), type_idx);
 }
 
 inline size_t DexCacheArraysLayout::TypesSize(size_t num_elements) const {
   // App image patching relies on having enough room for a forwarding pointer in the types array.
   // See FixupArtMethodArrayVisitor and ClassLinker::AddImageSpace.
-  return std::max(ArraySize(sizeof(GcRoot<mirror::Class>), num_elements), pointer_size_);
+  return std::max(ArraySize(GcRootAsPointerSize<mirror::Class>(), num_elements),
+                  static_cast<size_t>(pointer_size_));
 }
 
 inline size_t DexCacheArraysLayout::TypesAlignment() const {
@@ -75,23 +79,30 @@
 
 inline size_t DexCacheArraysLayout::MethodsSize(size_t num_elements) const {
   // App image patching relies on having enough room for a forwarding pointer in the methods array.
-  return std::max(ArraySize(pointer_size_, num_elements), pointer_size_);
+  return std::max(ArraySize(pointer_size_, num_elements), static_cast<size_t>(pointer_size_));
 }
 
 inline size_t DexCacheArraysLayout::MethodsAlignment() const {
-  return pointer_size_;
+  return static_cast<size_t>(pointer_size_);
 }
 
 inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const {
-  return strings_offset_ + ElementOffset(sizeof(GcRoot<mirror::String>), string_idx);
+  return strings_offset_ + ElementOffset(PointerSize::k64,
+                                         string_idx % mirror::DexCache::kDexCacheStringCacheSize);
 }
 
 inline size_t DexCacheArraysLayout::StringsSize(size_t num_elements) const {
-  return ArraySize(sizeof(GcRoot<mirror::String>), num_elements);
+  size_t cache_size = mirror::DexCache::kDexCacheStringCacheSize;
+  if (num_elements < cache_size) {
+    cache_size = num_elements;
+  }
+  return ArraySize(PointerSize::k64, cache_size);
 }
 
 inline size_t DexCacheArraysLayout::StringsAlignment() const {
-  return alignof(GcRoot<mirror::String>);
+  static_assert(alignof(mirror::StringDexCacheType) == 8,
+                "Expecting alignof(StringDexCacheType) == 8");
+  return alignof(mirror::StringDexCacheType);
 }
 
 inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const {
@@ -103,15 +114,15 @@
 }
 
 inline size_t DexCacheArraysLayout::FieldsAlignment() const {
-  return pointer_size_;
+  return static_cast<size_t>(pointer_size_);
 }
 
-inline size_t DexCacheArraysLayout::ElementOffset(size_t element_size, uint32_t idx) {
-  return element_size * idx;
+inline size_t DexCacheArraysLayout::ElementOffset(PointerSize element_size, uint32_t idx) {
+  return static_cast<size_t>(element_size) * idx;
 }
 
-inline size_t DexCacheArraysLayout::ArraySize(size_t element_size, uint32_t num_elements) {
-  return element_size * num_elements;
+inline size_t DexCacheArraysLayout::ArraySize(PointerSize element_size, uint32_t num_elements) {
+  return static_cast<size_t>(element_size) * num_elements;
 }
 
 }  // namespace art
diff --git a/runtime/utils/dex_cache_arrays_layout.h b/runtime/utils/dex_cache_arrays_layout.h
index cd84460..20ffa90 100644
--- a/runtime/utils/dex_cache_arrays_layout.h
+++ b/runtime/utils/dex_cache_arrays_layout.h
@@ -31,7 +31,7 @@
   // Construct an invalid layout.
   DexCacheArraysLayout()
       : /* types_offset_ is always 0u */
-        pointer_size_(0u),
+        pointer_size_(kRuntimePointerSize),
         methods_offset_(0u),
         strings_offset_(0u),
         fields_offset_(0u),
@@ -39,10 +39,10 @@
   }
 
   // Construct a layout for a particular dex file header.
-  DexCacheArraysLayout(size_t pointer_size, const DexFile::Header& header);
+  DexCacheArraysLayout(PointerSize pointer_size, const DexFile::Header& header);
 
   // Construct a layout for a particular dex file.
-  DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file);
+  DexCacheArraysLayout(PointerSize pointer_size, const DexFile* dex_file);
 
   bool Valid() const {
     return Size() != 0u;
@@ -52,7 +52,7 @@
     return size_;
   }
 
-  size_t Alignment() const;
+  static constexpr size_t Alignment();
 
   size_t TypesOffset() const {
     return types_offset_;
@@ -96,17 +96,17 @@
 
  private:
   static constexpr size_t types_offset_ = 0u;
-  const size_t pointer_size_;  // Must be first for construction initialization order.
+  const PointerSize pointer_size_;  // Must be first for construction initialization order.
   const size_t methods_offset_;
   const size_t strings_offset_;
   const size_t fields_offset_;
   const size_t size_;
 
-  static size_t Alignment(size_t pointer_size);
+  static size_t Alignment(PointerSize pointer_size);
 
-  static size_t ElementOffset(size_t element_size, uint32_t idx);
+  static size_t ElementOffset(PointerSize element_size, uint32_t idx);
 
-  static size_t ArraySize(size_t element_size, uint32_t num_elements);
+  static size_t ArraySize(PointerSize element_size, uint32_t num_elements);
 };
 
 }  // namespace art
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index f00edff..0a01cdb 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -16,6 +16,7 @@
 
 #include "utils.h"
 
+#include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "mirror/array.h"
@@ -187,17 +188,17 @@
   ASSERT_TRUE(c != nullptr);
   ArtMethod* m;
 
-  m = c->FindVirtualMethod("charAt", "(I)C", sizeof(void*));
+  m = c->FindVirtualMethod("charAt", "(I)C", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
   EXPECT_EQ("Java_java_lang_String_charAt", JniShortName(m));
   EXPECT_EQ("Java_java_lang_String_charAt__I", JniLongName(m));
 
-  m = c->FindVirtualMethod("indexOf", "(Ljava/lang/String;I)I", sizeof(void*));
+  m = c->FindVirtualMethod("indexOf", "(Ljava/lang/String;I)I", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
   EXPECT_EQ("Java_java_lang_String_indexOf", JniShortName(m));
   EXPECT_EQ("Java_java_lang_String_indexOf__Ljava_lang_String_2I", JniLongName(m));
 
-  m = c->FindDirectMethod("copyValueOf", "([CII)Ljava/lang/String;", sizeof(void*));
+  m = c->FindDirectMethod("copyValueOf", "([CII)Ljava/lang/String;", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
   EXPECT_EQ("Java_java_lang_String_copyValueOf", JniShortName(m));
   EXPECT_EQ("Java_java_lang_String_copyValueOf___3CII", JniLongName(m));
@@ -321,26 +322,30 @@
   EXPECT_FALSE(EndsWith("oo", "foo"));
 }
 
-TEST_F(UtilsTest, GetDalvikCacheFilenameOrDie) {
-  EXPECT_STREQ("/foo/system@app@Foo.apk@classes.dex",
-               GetDalvikCacheFilenameOrDie("/system/app/Foo.apk", "/foo").c_str());
+TEST_F(UtilsTest, GetDalvikCacheFilename) {
+  std::string name;
+  std::string error;
 
-  EXPECT_STREQ("/foo/data@app@foo-1.apk@classes.dex",
-               GetDalvikCacheFilenameOrDie("/data/app/foo-1.apk", "/foo").c_str());
-  EXPECT_STREQ("/foo/system@framework@core.jar@classes.dex",
-               GetDalvikCacheFilenameOrDie("/system/framework/core.jar", "/foo").c_str());
-  EXPECT_STREQ("/foo/system@framework@boot.art",
-               GetDalvikCacheFilenameOrDie("/system/framework/boot.art", "/foo").c_str());
-  EXPECT_STREQ("/foo/system@framework@boot.oat",
-               GetDalvikCacheFilenameOrDie("/system/framework/boot.oat", "/foo").c_str());
+  EXPECT_TRUE(GetDalvikCacheFilename("/system/app/Foo.apk", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/system@app@Foo.apk@classes.dex", name);
+
+  EXPECT_TRUE(GetDalvikCacheFilename("/data/app/foo-1.apk", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/data@app@foo-1.apk@classes.dex", name);
+
+  EXPECT_TRUE(GetDalvikCacheFilename("/system/framework/core.jar", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/system@framework@core.jar@classes.dex", name);
+
+  EXPECT_TRUE(GetDalvikCacheFilename("/system/framework/boot.art", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/system@framework@boot.art", name);
+
+  EXPECT_TRUE(GetDalvikCacheFilename("/system/framework/boot.oat", "/foo", &name, &error)) << error;
+  EXPECT_EQ("/foo/system@framework@boot.oat", name);
 }
 
 TEST_F(UtilsTest, GetDalvikCache) {
-  EXPECT_STREQ("", GetDalvikCache("should-not-exist123", false).c_str());
+  EXPECT_STREQ("", GetDalvikCache("should-not-exist123").c_str());
 
-  EXPECT_STREQ((android_data_ + "/dalvik-cache/.").c_str(), GetDalvikCache(".", false).c_str());
-  EXPECT_STREQ((android_data_ + "/dalvik-cache/should-not-be-there").c_str(),
-               GetDalvikCache("should-not-be-there", true).c_str());
+  EXPECT_STREQ((android_data_ + "/dalvik-cache/.").c_str(), GetDalvikCache(".").c_str());
 }
 
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index b2be770..40f12e9 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -20,6 +20,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
@@ -1156,9 +1157,6 @@
     case Instruction::kVerifyRegCWide:
       result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
-    case Instruction::kVerifyRegCString:
-      result = result && CheckStringIndex(inst->VRegC());
-      break;
   }
   switch (inst->GetVerifyExtraFlags()) {
     case Instruction::kVerifyArrayData:
@@ -1257,6 +1255,10 @@
   if (descriptor[0] != 'L') {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "can't call new-instance on type '" << descriptor << "'";
     return false;
+  } else if (strcmp(descriptor, "Ljava/lang/Class;") == 0) {
+    // An unlikely new instance on Class is not allowed. Fall back to interpreter to ensure an
+    // exception is thrown when this statement is executed (compiled code would not do that).
+    Fail(VERIFY_ERROR_INSTANTIATION);
   }
   return true;
 }
@@ -2839,7 +2841,7 @@
       ArtMethod* called_method = VerifyInvocationArgs(inst, type, is_range);
       const RegType* return_type = nullptr;
       if (called_method != nullptr) {
-        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* return_type_class = called_method->GetReturnType(can_load_classes_,
                                                                         pointer_size);
         if (return_type_class != nullptr) {
@@ -2882,7 +2884,7 @@
       } else {
         is_constructor = called_method->IsConstructor();
         return_type_descriptor = called_method->GetReturnTypeDescriptor();
-        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* return_type_class = called_method->GetReturnType(can_load_classes_,
                                                                         pointer_size);
         if (return_type_class != nullptr) {
@@ -3326,69 +3328,15 @@
       }
       break;
     }
-    case Instruction::INVOKE_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement invoke-lambda verification
-      break;
-    }
-    case Instruction::CAPTURE_VARIABLE: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement capture-variable verification
-      break;
-    }
-    case Instruction::CREATE_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement create-lambda verification
-      break;
-    }
-    case Instruction::LIBERATE_VARIABLE: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement liberate-variable verification
-      break;
-    }
-
-    case Instruction::UNUSED_F4: {
-      DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
-      // Conservatively fail verification on release builds.
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
-      break;
-    }
-
-    case Instruction::BOX_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement box-lambda verification
-
-      // Partial verification. Sets the resulting type to always be an object, which
-      // is good enough for some other verification to occur without hard-failing.
-      const uint32_t vreg_target_object = inst->VRegA_22x();  // box-lambda vA, vB
-      const RegType& reg_type = reg_types_.JavaLangObject(need_precise_constants_);
-      work_line_->SetRegisterType<LockOp::kClear>(this, vreg_target_object, reg_type);
-      break;
-    }
-
-     case Instruction::UNBOX_LAMBDA: {
-      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
-      // If the code would've normally hard-failed, then the interpreter will throw the
-      // appropriate verification errors at runtime.
-      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement unbox-lambda verification
-      break;
-    }
 
     /* These should never appear during verification. */
     case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-    case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
+    case Instruction::UNUSED_F3 ... Instruction::UNUSED_F9:
+    case Instruction::UNUSED_FC ... Instruction::UNUSED_FF:
     case Instruction::UNUSED_79:
     case Instruction::UNUSED_7A:
+    case Instruction::INVOKE_POLYMORPHIC:
+    case Instruction::INVOKE_POLYMORPHIC_RANGE:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
       break;
 
@@ -4528,7 +4476,7 @@
 
 ArtField* MethodVerifier::GetInstanceField(const RegType& obj_type, int field_idx) {
   const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
-  // Check access to class
+  // Check access to class.
   const RegType& klass_type = ResolveClassAndCheckAccess(field_id.class_idx_);
   if (klass_type.IsConflict()) {
     AppendToLastFailMessage(StringPrintf(" in attempt to access instance field %d (%s) in %s",
@@ -4549,28 +4497,19 @@
     DCHECK(self_->IsExceptionPending());
     self_->ClearException();
     return nullptr;
-  } else if (!GetDeclaringClass().CanAccessMember(field->GetDeclaringClass(),
-                                                  field->GetAccessFlags())) {
-    Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access instance field " << PrettyField(field)
-                                    << " from " << GetDeclaringClass();
-    return nullptr;
-  } else if (field->IsStatic()) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field)
-                                    << " to not be static";
-    return nullptr;
   } else if (obj_type.IsZero()) {
-    // Cannot infer and check type, however, access will cause null pointer exception
-    return field;
+    // Cannot infer and check type, however, access will cause null pointer exception.
+    // Fall through into a few last soft failure checks below.
   } else if (!obj_type.IsReferenceTypes()) {
-    // Trying to read a field from something that isn't a reference
+    // Trying to read a field from something that isn't a reference.
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "instance field access on object that has "
                                       << "non-reference type " << obj_type;
     return nullptr;
   } else {
+    std::string temp;
     mirror::Class* klass = field->GetDeclaringClass();
     const RegType& field_klass =
-        FromClass(dex_file_->GetFieldDeclaringClassDescriptor(field_id),
-                  klass, klass->CannotBeAssignedFromOtherTypes());
+        FromClass(klass->GetDescriptor(&temp), klass, klass->CannotBeAssignedFromOtherTypes());
     if (obj_type.IsUninitializedTypes()) {
       // Field accesses through uninitialized references are only allowable for constructors where
       // the field is declared in this class.
@@ -4584,7 +4523,6 @@
                                           << " of " << PrettyMethod(dex_method_idx_, *dex_file_);
         return nullptr;
       }
-      return field;
     } else if (!field_klass.IsAssignableFrom(obj_type)) {
       // Trying to access C1.field1 using reference of type C2, which is neither C1 or a sub-class
       // of C1. For resolution to occur the declared class of the field must be compatible with
@@ -4602,10 +4540,22 @@
       Fail(type) << "cannot access instance field " << PrettyField(field)
                  << " from object of type " << obj_type;
       return nullptr;
-    } else {
-      return field;
     }
   }
+
+  // Few last soft failure checks.
+  if (!GetDeclaringClass().CanAccessMember(field->GetDeclaringClass(),
+                                           field->GetAccessFlags())) {
+    Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access instance field " << PrettyField(field)
+                                    << " from " << GetDeclaringClass();
+    return nullptr;
+  } else if (field->IsStatic()) {
+    Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field)
+                                    << " to not be static";
+    return nullptr;
+  }
+
+  return field;
 }
 
 template <MethodVerifier::FieldAccessType kAccType>
@@ -4928,6 +4878,10 @@
       // Initialize them as conflicts so they don't add to GC and deoptimization information.
       const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn);
       AdjustReturnLine(this, ret_inst, target_line);
+      // Directly bail if a hard failure was found.
+      if (have_pending_hard_failure_) {
+        return false;
+      }
     }
   } else {
     RegisterLineArenaUniquePtr copy;
@@ -4963,7 +4917,7 @@
 const RegType& MethodVerifier::GetMethodReturnType() {
   if (return_type_ == nullptr) {
     if (mirror_method_ != nullptr) {
-      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       mirror::Class* return_type_class = mirror_method_->GetReturnType(can_load_classes_,
                                                                        pointer_size);
       if (return_type_class != nullptr) {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 2592a21..5fe95c2 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -86,13 +86,10 @@
   VERIFY_ERROR_ACCESS_METHOD = 128,       // IllegalAccessError.
   VERIFY_ERROR_CLASS_CHANGE = 256,        // IncompatibleClassChangeError.
   VERIFY_ERROR_INSTANTIATION = 512,       // InstantiationError.
-  // For opcodes that don't have complete verifier support (such as lambda opcodes),
-  // we need a way to continue execution at runtime without attempting to re-verify
-  // (since we know it will fail no matter what). Instead, run as the interpreter
-  // in a special "do access checks" mode which will perform verifier-like checking
-  // on the fly.
-  //
-  // TODO: Once all new opcodes have implemented full verifier support, this can be removed.
+  // For opcodes that don't have complete verifier support,  we need a way to continue
+  // execution at runtime without attempting to re-verify (since we know it will fail no
+  // matter what). Instead, run as the interpreter in a special "do access checks" mode
+  // which will perform verifier-like checking on the fly.
   VERIFY_ERROR_FORCE_INTERPRETER = 1024,  // Skip the verification phase at runtime;
                                           // force the interpreter to do access checks.
                                           // (sets a soft fail at compile time).
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 308c2aa..85daba9 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -773,6 +773,8 @@
   }
   if (!klass_.IsNull()) {
     CHECK(!descriptor_.empty()) << *this;
+    std::string temp;
+    CHECK_EQ(descriptor_.ToString(), klass_.Read()->GetDescriptor(&temp)) << *this;
   }
 }
 
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 355d552..2c99275 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -30,6 +30,7 @@
 namespace art {
 
 jclass WellKnownClasses::com_android_dex_Dex;
+jclass WellKnownClasses::dalvik_annotation_optimization_FastNative;
 jclass WellKnownClasses::dalvik_system_DexFile;
 jclass WellKnownClasses::dalvik_system_DexPathList;
 jclass WellKnownClasses::dalvik_system_DexPathList__Element;
@@ -57,7 +58,6 @@
 jclass WellKnownClasses::java_lang_StringFactory;
 jclass WellKnownClasses::java_lang_System;
 jclass WellKnownClasses::java_lang_Thread;
-jclass WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler;
 jclass WellKnownClasses::java_lang_ThreadGroup;
 jclass WellKnownClasses::java_lang_Throwable;
 jclass WellKnownClasses::java_nio_DirectByteBuffer;
@@ -121,9 +121,9 @@
 jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromCodePoints;
 jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder;
 jmethodID WellKnownClasses::java_lang_System_runFinalization = nullptr;
+jmethodID WellKnownClasses::java_lang_Thread_dispatchUncaughtException;
 jmethodID WellKnownClasses::java_lang_Thread_init;
 jmethodID WellKnownClasses::java_lang_Thread_run;
-jmethodID WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler_uncaughtException;
 jmethodID WellKnownClasses::java_lang_ThreadGroup_removeThread;
 jmethodID WellKnownClasses::java_nio_DirectByteBuffer_init;
 jmethodID WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation;
@@ -141,7 +141,6 @@
 jfieldID WellKnownClasses::java_lang_Thread_lock;
 jfieldID WellKnownClasses::java_lang_Thread_name;
 jfieldID WellKnownClasses::java_lang_Thread_priority;
-jfieldID WellKnownClasses::java_lang_Thread_uncaughtHandler;
 jfieldID WellKnownClasses::java_lang_Thread_nativePeer;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_groups;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_ngroups;
@@ -182,7 +181,7 @@
   if (fid == nullptr) {
     ScopedObjectAccess soa(env);
     if (soa.Self()->IsExceptionPending()) {
-      LOG(INTERNAL_FATAL) << soa.Self()->GetException()->Dump() << '\n';
+      LOG(INTERNAL_FATAL) << soa.Self()->GetException()->Dump();
     }
     std::ostringstream os;
     WellKnownClasses::ToClass(c)->DumpClass(os, mirror::Class::kDumpClassFullDetail);
@@ -199,7 +198,7 @@
   if (mid == nullptr) {
     ScopedObjectAccess soa(env);
     if (soa.Self()->IsExceptionPending()) {
-      LOG(INTERNAL_FATAL) << soa.Self()->GetException()->Dump() << '\n';
+      LOG(INTERNAL_FATAL) << soa.Self()->GetException()->Dump();
     }
     std::ostringstream os;
     WellKnownClasses::ToClass(c)->DumpClass(os, mirror::Class::kDumpClassFullDetail);
@@ -217,6 +216,7 @@
 
 void WellKnownClasses::Init(JNIEnv* env) {
   com_android_dex_Dex = CacheClass(env, "com/android/dex/Dex");
+  dalvik_annotation_optimization_FastNative = CacheClass(env, "dalvik/annotation/optimization/FastNative");
   dalvik_system_DexFile = CacheClass(env, "dalvik/system/DexFile");
   dalvik_system_DexPathList = CacheClass(env, "dalvik/system/DexPathList");
   dalvik_system_DexPathList__Element = CacheClass(env, "dalvik/system/DexPathList$Element");
@@ -245,8 +245,6 @@
   java_lang_StringFactory = CacheClass(env, "java/lang/StringFactory");
   java_lang_System = CacheClass(env, "java/lang/System");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
-  java_lang_Thread__UncaughtExceptionHandler = CacheClass(env,
-      "java/lang/Thread$UncaughtExceptionHandler");
   java_lang_ThreadGroup = CacheClass(env, "java/lang/ThreadGroup");
   java_lang_Throwable = CacheClass(env, "java/lang/Throwable");
   java_nio_DirectByteBuffer = CacheClass(env, "java/nio/DirectByteBuffer");
@@ -273,9 +271,9 @@
   java_lang_ref_ReferenceQueue_add = CacheMethod(env, java_lang_ref_ReferenceQueue.get(), true, "add", "(Ljava/lang/ref/Reference;)V");
 
   java_lang_reflect_Proxy_invoke = CacheMethod(env, java_lang_reflect_Proxy, true, "invoke", "(Ljava/lang/reflect/Proxy;Ljava/lang/reflect/Method;[Ljava/lang/Object;)Ljava/lang/Object;");
+  java_lang_Thread_dispatchUncaughtException = CacheMethod(env, java_lang_Thread, false, "dispatchUncaughtException", "(Ljava/lang/Throwable;)V");
   java_lang_Thread_init = CacheMethod(env, java_lang_Thread, false, "<init>", "(Ljava/lang/ThreadGroup;Ljava/lang/String;IZ)V");
   java_lang_Thread_run = CacheMethod(env, java_lang_Thread, false, "run", "()V");
-  java_lang_Thread__UncaughtExceptionHandler_uncaughtException = CacheMethod(env, java_lang_Thread__UncaughtExceptionHandler, false, "uncaughtException", "(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
   java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "threadTerminated", "(Ljava/lang/Thread;)V");
   java_nio_DirectByteBuffer_init = CacheMethod(env, java_nio_DirectByteBuffer, false, "<init>", "(JI)V");
   libcore_reflect_AnnotationFactory_createAnnotation = CacheMethod(env, libcore_reflect_AnnotationFactory, true, "createAnnotation", "(Ljava/lang/Class;[Llibcore/reflect/AnnotationMember;)Ljava/lang/annotation/Annotation;");
@@ -349,7 +347,6 @@
   java_lang_Thread_lock = CacheField(env, java_lang_Thread, false, "lock", "Ljava/lang/Object;");
   java_lang_Thread_name = CacheField(env, java_lang_Thread, false, "name", "Ljava/lang/String;");
   java_lang_Thread_priority = CacheField(env, java_lang_Thread, false, "priority", "I");
-  java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtExceptionHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
   java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "J");
   java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "[Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_ngroups = CacheField(env, java_lang_ThreadGroup, false, "ngroups", "I");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index cc60b4d..b8e05b8 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -41,6 +41,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static jclass com_android_dex_Dex;
+  static jclass dalvik_annotation_optimization_FastNative;
   static jclass dalvik_system_DexFile;
   static jclass dalvik_system_DexPathList;
   static jclass dalvik_system_DexPathList__Element;
@@ -69,7 +70,6 @@
   static jclass java_lang_System;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
-  static jclass java_lang_Thread__UncaughtExceptionHandler;
   static jclass java_lang_Throwable;
   static jclass java_util_ArrayList;
   static jclass java_util_Collections;
@@ -132,9 +132,9 @@
   static jmethodID java_lang_StringFactory_newStringFromCodePoints;
   static jmethodID java_lang_StringFactory_newStringFromStringBuilder;
   static jmethodID java_lang_System_runFinalization;
+  static jmethodID java_lang_Thread_dispatchUncaughtException;
   static jmethodID java_lang_Thread_init;
   static jmethodID java_lang_Thread_run;
-  static jmethodID java_lang_Thread__UncaughtExceptionHandler_uncaughtException;
   static jmethodID java_lang_ThreadGroup_removeThread;
   static jmethodID java_nio_DirectByteBuffer_init;
   static jmethodID libcore_reflect_AnnotationFactory_createAnnotation;
@@ -154,7 +154,6 @@
   static jfieldID java_lang_Thread_lock;
   static jfieldID java_lang_Thread_name;
   static jfieldID java_lang_Thread_priority;
-  static jfieldID java_lang_Thread_uncaughtHandler;
   static jfieldID java_lang_Thread_nativePeer;
   static jfieldID java_lang_ThreadGroup_groups;
   static jfieldID java_lang_ThreadGroup_ngroups;
diff --git a/sigchainlib/Android.mk b/sigchainlib/Android.mk
index b9e37a1..e1120e4 100644
--- a/sigchainlib/Android.mk
+++ b/sigchainlib/Android.mk
@@ -24,7 +24,7 @@
 LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
 LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS)
 LOCAL_SRC_FILES := sigchain_dummy.cc
-LOCAL_CLANG = $(ART_TARGET_CLANG)
+LOCAL_CLANG := $(ART_TARGET_CLANG)
 LOCAL_MODULE:= libsigchain
 LOCAL_SHARED_LIBRARIES := liblog
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
@@ -39,7 +39,7 @@
 LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
 LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS)
 LOCAL_SRC_FILES := sigchain.cc
-LOCAL_CLANG = $(ART_TARGET_CLANG)
+LOCAL_CLANG := $(ART_TARGET_CLANG)
 LOCAL_MODULE:= libsigchain
 LOCAL_SHARED_LIBRARIES := liblog
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
@@ -54,11 +54,11 @@
 LOCAL_IS_HOST_MODULE := true
 LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
 LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
-LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_CLANG := $(ART_HOST_CLANG)
 LOCAL_SRC_FILES := sigchain_dummy.cc
 LOCAL_MODULE:= libsigchain
 LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_LDLIBS = -ldl
+LOCAL_LDLIBS := -ldl
 LOCAL_MULTILIB := both
 LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
 include $(BUILD_HOST_SHARED_LIBRARY)
@@ -69,11 +69,11 @@
 LOCAL_IS_HOST_MODULE := true
 LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
 LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
-LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_CLANG := $(ART_HOST_CLANG)
 LOCAL_SRC_FILES := sigchain.cc
 LOCAL_MODULE:= libsigchain
 LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_LDLIBS = -ldl
+LOCAL_LDLIBS := -ldl
 LOCAL_MULTILIB := both
 include $(BUILD_HOST_STATIC_LIBRARY)
 
@@ -85,10 +85,10 @@
 LOCAL_MODULE_TAGS := optional
 LOCAL_IS_HOST_MODULE := true
 LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
-LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_CLANG := $(ART_HOST_CLANG)
 LOCAL_SRC_FILES := sigchain_dummy.cc
 LOCAL_MODULE:= libsigchain_dummy
 LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_LDLIBS = -ldl
+LOCAL_LDLIBS := -ldl
 LOCAL_MULTILIB := both
 include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index b76555b..c1efecd 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <android/log.h>
 #else
 #include <stdarg.h>
@@ -103,7 +103,7 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
 #else
   std::cout << buf << "\n";
@@ -233,7 +233,7 @@
   return linked_sigaction(signal, new_action, old_action);
 }
 
-extern "C" sighandler_t signal(int signal, sighandler_t handler) {
+static sighandler_t signal_impl(int signal, sighandler_t handler) {
   struct sigaction sa;
   sigemptyset(&sa.sa_mask);
   sa.sa_handler = handler;
@@ -272,6 +272,16 @@
   return reinterpret_cast<sighandler_t>(sa.sa_handler);
 }
 
+extern "C" sighandler_t signal(int signal, sighandler_t handler) {
+  return signal_impl(signal, handler);
+}
+
+#if !defined(__LP64__)
+extern "C" sighandler_t bsd_signal(int signal, sighandler_t handler) {
+  return signal_impl(signal, handler);
+}
+#endif
+
 extern "C" int sigprocmask(int how, const sigset_t* bionic_new_set, sigset_t* bionic_old_set) {
   const sigset_t* new_set_ptr = bionic_new_set;
   sigset_t tmpset;
diff --git a/sigchainlib/sigchain_dummy.cc b/sigchainlib/sigchain_dummy.cc
index dfe0c6f..aa3c360 100644
--- a/sigchainlib/sigchain_dummy.cc
+++ b/sigchainlib/sigchain_dummy.cc
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <android/log.h>
 #else
 #include <stdarg.h>
@@ -38,7 +38,7 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
 #else
   std::cout << buf << "\n";
diff --git a/sigchainlib/version-script32.txt b/sigchainlib/version-script32.txt
new file mode 100644
index 0000000..eec9103
--- /dev/null
+++ b/sigchainlib/version-script32.txt
@@ -0,0 +1,15 @@
+{
+global:
+  ClaimSignalChain;
+  UnclaimSignalChain;
+  InvokeUserSignalHandler;
+  InitializeSignalChain;
+  EnsureFrontOfChain;
+  SetSpecialSignalHandlerFn;
+  bsd_signal;
+  sigaction;
+  signal;
+  sigprocmask;
+local:
+  *;
+};
diff --git a/sigchainlib/version-script.txt b/sigchainlib/version-script64.txt
similarity index 100%
rename from sigchainlib/version-script.txt
rename to sigchainlib/version-script64.txt
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index 8619ff7..bb18a70 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -27,11 +27,18 @@
 
 static JavaVM* jvm = nullptr;
 
+static jint Java_Main_intFastNativeMethod(JNIEnv*, jclass, jint a, jint b, jint c);
+
+static JNINativeMethod sMainMethods[] = {
+  {"intFastNativeMethod", "(III)I", reinterpret_cast<void*>(Java_Main_intFastNativeMethod) }
+};
+
 extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void*) {
   CHECK(vm != nullptr);
   CHECK(jvm == nullptr);
   jvm = vm;
   std::cout << "JNI_OnLoad called" << std::endl;
+
   return JNI_VERSION_1_6;
 }
 
@@ -740,5 +747,24 @@
   InvokeSpecificMethod(e, l, "sayHi");
 }
 
+// Register on-demand because many tests share this JNI library and
+// we can't unconditionally register them.
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_registerNativesJniTest(JNIEnv* e, jclass kls) {
+  const size_t numMethods = sizeof(sMainMethods)/sizeof(JNINativeMethod);
+
+  if (e->RegisterNatives(kls, sMainMethods, numMethods) < 0) {
+      std::cerr << "RegisterNatives failed for 'Main'" << std::endl;
+      return JNI_FALSE;
+  }
+
+  return JNI_TRUE;
+}
+
+// Annotated with @FastNative in Java code. Doesn't need to be explicitly registered with "!".
+// NOTE: Has to be registered explicitly to avoid mutator lock check failures.
+static jint Java_Main_intFastNativeMethod(JNIEnv*, jclass, jint a, jint b, jint c) {
+  return a + b + c;
+}
+
 }  // namespace art
 
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index e0530d8..573afdb 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -18,6 +18,8 @@
 import java.lang.reflect.Method;
 import java.lang.reflect.Proxy;
 
+import dalvik.annotation.optimization.FastNative;
+
 public class Main {
     public static void main(String[] args) {
         System.loadLibrary(args[0]);
@@ -44,8 +46,13 @@
         testInvokeLambdaMethod(() -> { System.out.println("hi-lambda: " + lambda); });
         String def = "δ";
         testInvokeLambdaDefaultMethod(() -> { System.out.println("hi-default " + def + lambda); });
+
+        registerNativesJniTest();
+        testFastNativeMethods();
     }
 
+    private static native boolean registerNativesJniTest();
+
     private static native void testCallDefaultMethods();
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -220,7 +227,7 @@
         InvocationHandler handler = new DummyInvocationHandler();
         SimpleInterface proxy =
                 (SimpleInterface) Proxy.newProxyInstance(SimpleInterface.class.getClassLoader(),
-                        new Class[] {SimpleInterface.class}, handler);
+                        new Class<?>[] {SimpleInterface.class}, handler);
         if (testGetMethodID(SimpleInterface.class) == 0) {
             throw new AssertionError();
         }
@@ -263,6 +270,25 @@
     private static native void testInvokeLambdaMethod(LambdaInterface iface);
 
     private static native void testInvokeLambdaDefaultMethod(LambdaInterface iface);
+
+    // Test invoking @FastNative methods works correctly.
+
+    // Return sum of a+b+c.
+    @FastNative
+    static native int intFastNativeMethod(int a, int b, int c);
+
+    private static void testFastNativeMethods() {
+      int returns[] = { 0, 3, 6, 9, 12 };
+      for (int i = 0; i < returns.length; i++) {
+        int result = intFastNativeMethod(i, i, i);
+        if (returns[i] != result) {
+          System.out.println("FastNative Int Run " + i + " with " + returns[i] + " vs " + result);
+          throw new AssertionError();
+        }
+      }
+    }
+
+
 }
 
 @FunctionalInterface
diff --git a/test/004-ThreadStress/check b/test/004-ThreadStress/check
index ffbb8cf..77e4cdb 100755
--- a/test/004-ThreadStress/check
+++ b/test/004-ThreadStress/check
@@ -14,5 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Only compare the last line.
-tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
+# Do not compare numbers, so replace numbers with 'N'.
+sed '-es/[0-9][0-9]*/N/g' "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/004-ThreadStress/expected.txt b/test/004-ThreadStress/expected.txt
index a26fb4f..772faf6 100644
--- a/test/004-ThreadStress/expected.txt
+++ b/test/004-ThreadStress/expected.txt
@@ -1 +1,11 @@
+JNI_OnLoad called
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Finishing worker
+Finishing worker
+Finishing worker
+Finishing worker
 Finishing worker
diff --git a/test/004-ThreadStress/src/Main.java b/test/004-ThreadStress/src/Main.java
index b9a46de..5cae398 100644
--- a/test/004-ThreadStress/src/Main.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -93,9 +93,7 @@
 
                 killTemp = osClass.getDeclaredMethod("kill", int.class, int.class);
             } catch (Exception e) {
-                if (!e.getClass().getName().equals("ErrnoException")) {
-                    e.printStackTrace(System.out);
-                }
+                Main.printThrowable(e);
             }
 
             pid = pidTemp;
@@ -107,9 +105,10 @@
         public boolean perform() {
             try {
                 kill.invoke(null, pid, sigquit);
+            } catch (OutOfMemoryError e) {
             } catch (Exception e) {
-                if (!e.getClass().getName().equals("ErrnoException")) {
-                    e.printStackTrace(System.out);
+                if (!e.getClass().getName().equals(Main.errnoExceptionName)) {
+                    Main.printThrowable(e);
                 }
             }
             return true;
@@ -154,7 +153,10 @@
     private final static class StackTrace extends Operation {
         @Override
         public boolean perform() {
-            Thread.currentThread().getStackTrace();
+            try {
+                Thread.currentThread().getStackTrace();
+            } catch (OutOfMemoryError e) {
+            }
             return true;
         }
     }
@@ -264,6 +266,7 @@
     }
 
     public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
         parseAndRun(args);
     }
 
@@ -395,12 +398,21 @@
             System.out.println(frequencyMap);
         }
 
-        runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
+        try {
+            runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
+        } catch (Throwable t) {
+            // In this case, the output should not contain all the required
+            // "Finishing worker" lines.
+            Main.printThrowable(t);
+        }
     }
 
     public static void runTest(final int numberOfThreads, final int numberOfDaemons,
                                final int operationsPerThread, final Object lock,
                                Map<Operation, Double> frequencyMap) throws Exception {
+        final Thread mainThread = Thread.currentThread();
+        final Barrier startBarrier = new Barrier(numberOfThreads + numberOfDaemons + 1);
+
         // Each normal thread is going to do operationsPerThread
         // operations. Each daemon thread will loop over all
         // the operations and will not stop.
@@ -434,8 +446,9 @@
             }
             // Randomize the operation order
             Collections.shuffle(Arrays.asList(operations));
-            threadStresses[t] = t < numberOfThreads ? new Main(lock, t, operations) :
-                                                      new Daemon(lock, t, operations);
+            threadStresses[t] = (t < numberOfThreads)
+                    ? new Main(lock, t, operations)
+                    : new Daemon(lock, t, operations, mainThread, startBarrier);
         }
 
         // Enable to dump operation counts per thread to make sure its
@@ -470,32 +483,41 @@
             runners[r] = new Thread("Runner thread " + r) {
                 final Main threadStress = ts;
                 public void run() {
-                    int id = threadStress.id;
-                    System.out.println("Starting worker for " + id);
-                    while (threadStress.nextOperation < operationsPerThread) {
-                        try {
-                            Thread thread = new Thread(ts, "Worker thread " + id);
-                            thread.start();
+                    try {
+                        int id = threadStress.id;
+                        // No memory hungry task are running yet, so println() should succeed.
+                        System.out.println("Starting worker for " + id);
+                        // Wait until all runners and daemons reach the starting point.
+                        startBarrier.await();
+                        // Run the stress tasks.
+                        while (threadStress.nextOperation < operationsPerThread) {
                             try {
+                                Thread thread = new Thread(ts, "Worker thread " + id);
+                                thread.start();
                                 thread.join();
-                            } catch (InterruptedException e) {
-                            }
 
-                            System.out.println("Thread exited for " + id + " with "
-                                               + (operationsPerThread - threadStress.nextOperation)
-                                               + " operations remaining.");
-                        } catch (OutOfMemoryError e) {
-                            // Ignore OOME since we need to print "Finishing worker" for the test
-                            // to pass.
+                                if (DEBUG) {
+                                    System.out.println(
+                                        "Thread exited for " + id + " with " +
+                                        (operationsPerThread - threadStress.nextOperation) +
+                                        " operations remaining.");
+                                }
+                            } catch (OutOfMemoryError e) {
+                                // Ignore OOME since we need to print "Finishing worker"
+                                // for the test to pass. This OOM can come from creating
+                                // the Thread or from the DEBUG output.
+                                // Note that the Thread creation may fail repeatedly,
+                                // preventing the runner from making any progress,
+                                // especially if the number of daemons is too high.
+                            }
                         }
-                    }
-                    // Keep trying to print "Finishing worker" until it succeeds.
-                    while (true) {
-                        try {
-                            System.out.println("Finishing worker");
-                            break;
-                        } catch (OutOfMemoryError e) {
-                        }
+                        // Print "Finishing worker" through JNI to avoid OOME.
+                        Main.printString(Main.finishingWorkerMessage);
+                    } catch (Throwable t) {
+                        Main.printThrowable(t);
+                        // Interrupt the main thread, so that it can orderly shut down
+                        // instead of waiting indefinitely for some Barrier.
+                        mainThread.interrupt();
                     }
                 }
             };
@@ -528,6 +550,9 @@
         for (int r = 0; r < runners.length; r++) {
             runners[r].start();
         }
+        // Wait for all threads to reach the starting point.
+        startBarrier.await();
+        // Wait for runners to finish.
         for (int r = 0; r < runners.length; r++) {
             runners[r].join();
         }
@@ -570,8 +595,14 @@
     }
 
     private static class Daemon extends Main {
-        private Daemon(Object lock, int id, Operation[] operations) {
+        private Daemon(Object lock,
+                       int id,
+                       Operation[] operations,
+                       Thread mainThread,
+                       Barrier startBarrier) {
             super(lock, id, operations);
+            this.mainThread = mainThread;
+            this.startBarrier = startBarrier;
         }
 
         public void run() {
@@ -579,26 +610,74 @@
                 if (DEBUG) {
                     System.out.println("Starting ThreadStress Daemon " + id);
                 }
-                int i = 0;
-                while (true) {
-                    Operation operation = operations[i];
-                    if (DEBUG) {
-                        System.out.println("ThreadStress Daemon " + id
-                                           + " operation " + i
-                                           + " is " + operation);
+                startBarrier.await();
+                try {
+                    int i = 0;
+                    while (true) {
+                        Operation operation = operations[i];
+                        if (DEBUG) {
+                            System.out.println("ThreadStress Daemon " + id
+                                               + " operation " + i
+                                               + " is " + operation);
+                        }
+                        operation.perform();
+                        i = (i + 1) % operations.length;
                     }
-                    operation.perform();
-                    i = (i + 1) % operations.length;
+                } catch (OutOfMemoryError e) {
+                    // Catch OutOfMemoryErrors since these can cause the test to fail it they print
+                    // the stack trace after "Finishing worker". Note that operations should catch
+                    // their own OOME, this guards only agains OOME in the DEBUG output.
                 }
-            } catch (OutOfMemoryError e) {
-                // Catch OutOfMemoryErrors since these can cause the test to fail it they print
-                // the stack trace after "Finishing worker".
-            } finally {
                 if (DEBUG) {
                     System.out.println("Finishing ThreadStress Daemon for " + id);
                 }
+            } catch (Throwable t) {
+                Main.printThrowable(t);
+                // Interrupt the main thread, so that it can orderly shut down
+                // instead of waiting indefinitely for some Barrier.
+                mainThread.interrupt();
             }
         }
+
+        final Thread mainThread;
+        final Barrier startBarrier;
     }
 
+    // Note: java.util.concurrent.CyclicBarrier.await() allocates memory and may throw OOM.
+    // That is highly undesirable in this test, so we use our own simple barrier class.
+    // The only memory allocation that can happen here is the lock inflation which uses
+    // a native allocation. As such, it should succeed even if the Java heap is full.
+    // If the native allocation surprisingly fails, the program shall abort().
+    private static class Barrier {
+        public Barrier(int initialCount) {
+            count = initialCount;
+        }
+
+        public synchronized void await() throws InterruptedException {
+            --count;
+            if (count != 0) {
+                do {
+                    wait();
+                } while (count != 0);  // Check for spurious wakeup.
+            } else {
+                notifyAll();
+            }
+        }
+
+        private int count;
+    }
+
+    // Printing a String/Throwable through JNI requires only native memory and space
+    // in the local reference table, so it should succeed even if the Java heap is full.
+    private static native void printString(String s);
+    private static native void printThrowable(Throwable t);
+
+    static final String finishingWorkerMessage;
+    static final String errnoExceptionName;
+    static {
+        // We pre-allocate the strings in class initializer to avoid const-string
+        // instructions in code using these strings later as they may throw OOME.
+        finishingWorkerMessage = "Finishing worker\n";
+        errnoExceptionName = "ErrnoException";
+    }
 }
diff --git a/test/004-ThreadStress/thread_stress.cc b/test/004-ThreadStress/thread_stress.cc
new file mode 100644
index 0000000..573c352
--- /dev/null
+++ b/test/004-ThreadStress/thread_stress.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "jni.h"
+#include "mirror/string.h"
+#include "mirror/throwable.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_printString(JNIEnv*, jclass, jstring s) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::cout << soa.Decode<mirror::String*>(s)->ToModifiedUtf8();
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_printThrowable(JNIEnv*, jclass, jthrowable t) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::cout << soa.Decode<mirror::Throwable*>(t)->Dump();
+}
+
+}  // namespace art
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index b2f905e..d43d374 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -39,16 +39,24 @@
     }
   }
 
-  private static Unsafe getUnsafe() throws Exception {
+  private static Unsafe getUnsafe() throws NoSuchFieldException, IllegalAccessException {
     Class<?> unsafeClass = Unsafe.class;
     Field f = unsafeClass.getDeclaredField("theUnsafe");
     f.setAccessible(true);
     return (Unsafe) f.get(null);
   }
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws NoSuchFieldException, IllegalAccessException {
     System.loadLibrary(args[0]);
     Unsafe unsafe = getUnsafe();
+
+    testArrayBaseOffset(unsafe);
+    testArrayIndexScale(unsafe);
+    testGetAndPutAndCAS(unsafe);
+    testGetAndPutVolatile(unsafe);
+  }
+
+  private static void testArrayBaseOffset(Unsafe unsafe) {
     check(unsafe.arrayBaseOffset(boolean[].class), vmArrayBaseOffset(boolean[].class),
         "Unsafe.arrayBaseOffset(boolean[])");
     check(unsafe.arrayBaseOffset(byte[].class), vmArrayBaseOffset(byte[].class),
@@ -65,7 +73,9 @@
         "Unsafe.arrayBaseOffset(long[])");
     check(unsafe.arrayBaseOffset(Object[].class), vmArrayBaseOffset(Object[].class),
         "Unsafe.arrayBaseOffset(Object[])");
+  }
 
+  private static void testArrayIndexScale(Unsafe unsafe) {
     check(unsafe.arrayIndexScale(boolean[].class), vmArrayIndexScale(boolean[].class),
         "Unsafe.arrayIndexScale(boolean[])");
     check(unsafe.arrayIndexScale(byte[].class), vmArrayIndexScale(byte[].class),
@@ -82,7 +92,9 @@
         "Unsafe.arrayIndexScale(long[])");
     check(unsafe.arrayIndexScale(Object[].class), vmArrayIndexScale(Object[].class),
         "Unsafe.arrayIndexScale(Object[])");
+  }
 
+  private static void testGetAndPutAndCAS(Unsafe unsafe) throws NoSuchFieldException {
     TestClass t = new TestClass();
 
     int intValue = 12345678;
@@ -185,12 +197,58 @@
     }
   }
 
+  private static void testGetAndPutVolatile(Unsafe unsafe) throws NoSuchFieldException {
+    TestVolatileClass tv = new TestVolatileClass();
+
+    int intValue = 12345678;
+    Field volatileIntField = TestVolatileClass.class.getDeclaredField("volatileIntVar");
+    long volatileIntOffset = unsafe.objectFieldOffset(volatileIntField);
+    check(unsafe.getIntVolatile(tv, volatileIntOffset),
+          0,
+          "Unsafe.getIntVolatile(Object, long) - initial");
+    unsafe.putIntVolatile(tv, volatileIntOffset, intValue);
+    check(tv.volatileIntVar, intValue, "Unsafe.putIntVolatile(Object, long, int)");
+    check(unsafe.getIntVolatile(tv, volatileIntOffset),
+          intValue,
+          "Unsafe.getIntVolatile(Object, long)");
+
+    long longValue = 1234567887654321L;
+    Field volatileLongField = TestVolatileClass.class.getDeclaredField("volatileLongVar");
+    long volatileLongOffset = unsafe.objectFieldOffset(volatileLongField);
+    check(unsafe.getLongVolatile(tv, volatileLongOffset),
+          0,
+          "Unsafe.getLongVolatile(Object, long) - initial");
+    unsafe.putLongVolatile(tv, volatileLongOffset, longValue);
+    check(tv.volatileLongVar, longValue, "Unsafe.putLongVolatile(Object, long, long)");
+    check(unsafe.getLongVolatile(tv, volatileLongOffset),
+          longValue,
+          "Unsafe.getLongVolatile(Object, long)");
+
+    Object objectValue = new Object();
+    Field volatileObjectField = TestVolatileClass.class.getDeclaredField("volatileObjectVar");
+    long volatileObjectOffset = unsafe.objectFieldOffset(volatileObjectField);
+    check(unsafe.getObjectVolatile(tv, volatileObjectOffset),
+          null,
+          "Unsafe.getObjectVolatile(Object, long) - initial");
+    unsafe.putObjectVolatile(tv, volatileObjectOffset, objectValue);
+    check(tv.volatileObjectVar, objectValue, "Unsafe.putObjectVolatile(Object, long, Object)");
+    check(unsafe.getObjectVolatile(tv, volatileObjectOffset),
+          objectValue,
+          "Unsafe.getObjectVolatile(Object, long)");
+  }
+
   private static class TestClass {
     public int intVar = 0;
     public long longVar = 0;
     public Object objectVar = null;
   }
 
-  private static native int vmArrayBaseOffset(Class clazz);
-  private static native int vmArrayIndexScale(Class clazz);
+  private static class TestVolatileClass {
+    public volatile int volatileIntVar = 0;
+    public volatile long volatileLongVar = 0;
+    public volatile Object volatileObjectVar = null;
+  }
+
+  private static native int vmArrayBaseOffset(Class<?> clazz);
+  private static native int vmArrayIndexScale(Class<?> clazz);
 }
diff --git a/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java b/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
index 3088866..aa7808f 100644
--- a/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
+++ b/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
@@ -10,5 +10,5 @@
     boolean callMe() default false;
     boolean biteMe();
     AnnoFancyMethodEnum enumerated() default AnnoFancyMethodEnum.FOO;
-    Class someClass() default SomeClass.class;
+    Class<?> someClass() default SomeClass.class;
 }
diff --git a/test/005-annotations/src/android/test/anno/AnnoMissingClass.java b/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
index c32e9a2..7933b80 100644
--- a/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
+++ b/test/005-annotations/src/android/test/anno/AnnoMissingClass.java
@@ -20,5 +20,5 @@
 
 @Retention(RetentionPolicy.RUNTIME)
 public @interface AnnoMissingClass {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index 51254b4..8ea8e8e 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -42,7 +42,7 @@
         }
     }
 
-    static void printAnnotations(Class clazz) {
+    static void printAnnotations(Class<?> clazz) {
         Annotation[] annos;
         Annotation[][] parAnnos;
 
@@ -52,7 +52,7 @@
         printAnnotationArray("", annos);
         System.out.println();
 
-        for (Constructor c: clazz.getDeclaredConstructors()) {
+        for (Constructor<?> c: clazz.getDeclaredConstructors()) {
             annos = c.getDeclaredAnnotations();
             System.out.println("  annotations on CTOR " + c + ":");
             printAnnotationArray("  ", annos);
@@ -139,8 +139,7 @@
         final IntToString[] mapping;
 
         try {
-            meth = TestAnnotations.class.getMethod("getFocusType",
-                    (Class[])null);
+            meth = TestAnnotations.class.getMethod("getFocusType");
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -255,7 +254,7 @@
     }
 
     private static class VMRuntime {
-        private static Class vmRuntimeClass;
+        private static Class<?> vmRuntimeClass;
         private static Method getRuntimeMethod;
         private static Method getTargetSdkVersionMethod;
         private static Method setTargetSdkVersionMethod;
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 0226614..d1ea0b1 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -85,7 +85,7 @@
         Assert.assertEquals("this is a path", test.replaceAll("/", " "));
         Assert.assertEquals("this is a path", test.replace("/", " "));
 
-        Class Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
+        Class<?> Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
         Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
         String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
         System.out.println(result);
diff --git a/test/031-class-attributes/src/ClassAttrs.java b/test/031-class-attributes/src/ClassAttrs.java
index 38bd525..346e13d 100644
--- a/test/031-class-attributes/src/ClassAttrs.java
+++ b/test/031-class-attributes/src/ClassAttrs.java
@@ -118,14 +118,13 @@
         printClassAttrs(FancyClass.class);
 
         try {
-            Constructor cons;
-            cons = MemberClass.class.getConstructor(
-                    new Class[] { MemberClass.class });
+            Constructor<?> cons;
+            cons = MemberClass.class.getConstructor(MemberClass.class);
             System.out.println("constructor signature: "
                     + getSignatureAttribute(cons));
 
             Method meth;
-            meth = MemberClass.class.getMethod("foo", (Class[]) null);
+            meth = MemberClass.class.getMethod("foo");
             System.out.println("method signature: "
                     + getSignatureAttribute(meth));
 
@@ -222,7 +221,7 @@
     public static String getSignatureAttribute(Object obj) {
         Method method;
         try {
-            Class c = obj.getClass();
+            Class<?> c = obj.getClass();
             if (c == Method.class || c == Constructor.class) {
               c = AbstractMethod.class;
             }
@@ -263,9 +262,7 @@
     /*
      * Dump a variety of class attributes.
      */
-    public static void printClassAttrs(Class clazz) {
-        Class clazz2;
-
+    public static <T> void printClassAttrs(Class<T> clazz) {
         System.out.println("***** " + clazz + ":");
 
         System.out.println("  name: "
@@ -321,7 +318,7 @@
         System.out.println("  genericInterfaces: "
             + stringifyTypeArray(clazz.getGenericInterfaces()));
 
-        TypeVariable<Class<?>>[] typeParameters = clazz.getTypeParameters();
+        TypeVariable<Class<T>>[] typeParameters = clazz.getTypeParameters();
         System.out.println("  typeParameters: "
             + stringifyTypeArray(typeParameters));
     }
diff --git a/test/032-concrete-sub/src/ConcreteSub.java b/test/032-concrete-sub/src/ConcreteSub.java
index 083f25d..95adf63 100644
--- a/test/032-concrete-sub/src/ConcreteSub.java
+++ b/test/032-concrete-sub/src/ConcreteSub.java
@@ -37,13 +37,13 @@
         /*
          * Check reflection stuff.
          */
-        Class absClass = AbstractBase.class;
+        Class<?> absClass = AbstractBase.class;
         Method meth;
 
         System.out.println("class modifiers=" + absClass.getModifiers());
 
         try {
-            meth = absClass.getMethod("redefineMe", (Class[]) null);
+            meth = absClass.getMethod("redefineMe");
         } catch (NoSuchMethodException nsme) {
             nsme.printStackTrace();
             return;
diff --git a/test/033-class-init-deadlock/expected.txt b/test/033-class-init-deadlock/expected.txt
index 182d0da..9e843a0 100644
--- a/test/033-class-init-deadlock/expected.txt
+++ b/test/033-class-init-deadlock/expected.txt
@@ -1,6 +1,4 @@
 Deadlock test starting.
-A initializing...
-B initializing...
 Deadlock test interrupting threads.
 Deadlock test main thread bailing.
 A initialized: false
diff --git a/test/033-class-init-deadlock/src/Main.java b/test/033-class-init-deadlock/src/Main.java
index 3233230..bd4d4ab 100644
--- a/test/033-class-init-deadlock/src/Main.java
+++ b/test/033-class-init-deadlock/src/Main.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import java.util.concurrent.CyclicBarrier;
+
 /**
  * This causes most VMs to lock up.
  *
@@ -23,6 +25,8 @@
     public static boolean aInitialized = false;
     public static boolean bInitialized = false;
 
+    public static CyclicBarrier barrier = new CyclicBarrier(3);
+
     static public void main(String[] args) {
         Thread thread1, thread2;
 
@@ -30,10 +34,10 @@
         thread1 = new Thread() { public void run() { new A(); } };
         thread2 = new Thread() { public void run() { new B(); } };
         thread1.start();
-        // Give thread1 a chance to start before starting thread2.
-        try { Thread.sleep(1000); } catch (InterruptedException ie) { }
         thread2.start();
 
+        // Not expecting any exceptions, so print them out if we get them.
+        try { barrier.await(); } catch (Exception e) { System.out.println(e); }
         try { Thread.sleep(6000); } catch (InterruptedException ie) { }
 
         System.out.println("Deadlock test interrupting threads.");
@@ -48,8 +52,8 @@
 
 class A {
     static {
-        System.out.println("A initializing...");
-        try { Thread.sleep(3000); } catch (InterruptedException ie) { }
+        // Not expecting any exceptions, so print them out if we get them.
+        try { Main.barrier.await(); } catch (Exception e) { System.out.println(e); }
         new B();
         System.out.println("A initialized");
         Main.aInitialized = true;
@@ -58,8 +62,8 @@
 
 class B {
     static {
-        System.out.println("B initializing...");
-        try { Thread.sleep(3000); } catch (InterruptedException ie) { }
+        // Not expecting any exceptions, so print them out if we get them.
+        try { Main.barrier.await(); } catch (Exception e) { System.out.println(e); }
         new A();
         System.out.println("B initialized");
         Main.bInitialized = true;
diff --git a/test/042-new-instance/src/Main.java b/test/042-new-instance/src/Main.java
index 8cd6b2e..755d62e 100644
--- a/test/042-new-instance/src/Main.java
+++ b/test/042-new-instance/src/Main.java
@@ -33,7 +33,7 @@
     static void testClassNewInstance() {
         // should succeed
         try {
-            Class c = Class.forName("LocalClass");
+            Class<?> c = Class.forName("LocalClass");
             Object obj = c.newInstance();
             System.out.println("LocalClass succeeded");
         } catch (Exception ex) {
@@ -43,7 +43,7 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess");
+            Class<?> c = Class.forName("otherpackage.PackageAccess");
             Object obj = c.newInstance();
             System.err.println("ERROR: PackageAccess succeeded unexpectedly");
         } catch (IllegalAccessException iae) {
@@ -71,8 +71,8 @@
     static void testConstructorNewInstance() {
         // should fail -- getConstructor only returns public constructors
         try {
-            Class c = Class.forName("LocalClass");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("LocalClass");
+            Constructor<?> cons = c.getConstructor();
             System.err.println("Cons LocalClass succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             System.out.println("Cons LocalClass failed as expected");
@@ -83,8 +83,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("LocalClass2");
-            Constructor cons = c.getConstructor((Class[]) null);
+            Class<?> c = Class.forName("LocalClass2");
+            Constructor<?> cons = c.getConstructor();
             Object obj = cons.newInstance();
             System.out.println("Cons LocalClass2 succeeded");
         } catch (Exception ex) {
@@ -94,8 +94,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("Main$InnerClass");
-            Constructor cons = c.getDeclaredConstructor(new Class<?>[]{Main.class});
+            Class<?> c = Class.forName("Main$InnerClass");
+            Constructor<?> cons = c.getDeclaredConstructor(Main.class);
             Object obj = cons.newInstance(new Main());
             System.out.println("Cons InnerClass succeeded");
         } catch (Exception ex) {
@@ -105,8 +105,8 @@
 
         // should succeed
         try {
-            Class c = Class.forName("Main$StaticInnerClass");
-            Constructor cons = c.getDeclaredConstructor((Class[]) null);
+            Class<?> c = Class.forName("Main$StaticInnerClass");
+            Constructor<?> cons = c.getDeclaredConstructor();
             Object obj = cons.newInstance();
             System.out.println("Cons StaticInnerClass succeeded");
         } catch (Exception ex) {
@@ -116,8 +116,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("otherpackage.PackageAccess");
+            Constructor<?> cons = c.getConstructor();
             System.err.println("ERROR: Cons PackageAccess succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             // constructor isn't public
@@ -129,8 +129,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("MaybeAbstract");
-            Constructor cons = c.getConstructor(new Class[0] /*(Class[])null*/);
+            Class<?> c = Class.forName("MaybeAbstract");
+            Constructor<?> cons = c.getConstructor();
             Object obj = cons.newInstance();
             System.err.println("ERROR: Cons MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationException ie) {
@@ -143,8 +143,8 @@
 
         // should fail
         try {
-            Class c = Class.forName("otherpackage.PackageAccess2");
-            Constructor cons = c.getConstructor((Class[]) null);
+            Class<?> c = Class.forName("otherpackage.PackageAccess2");
+            Constructor<?> cons = c.getConstructor();
             if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
             Object obj = cons.newInstance();
             System.err.println("ERROR: Cons PackageAccess2 succeeded unexpectedly");
@@ -197,7 +197,7 @@
 
         static Object newInstance() {
             try {
-                Class c = CC.class;
+                Class<?> c = CC.class;
                 return c.newInstance();
             } catch (Exception ex) {
                 ex.printStackTrace();
diff --git a/test/042-new-instance/src/otherpackage/ConstructorAccess.java b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
index a74e9a0..79d572c 100644
--- a/test/042-new-instance/src/otherpackage/ConstructorAccess.java
+++ b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
@@ -29,8 +29,8 @@
     // accessibility using the frame below (in Main class), we will see an
     // IllegalAccessException from #newInstance
     static public void newConstructorInstance() throws Exception {
-      Class c = Inner.class;
-      Constructor cons = c.getDeclaredConstructor((Class[]) null);
+      Class<?> c = Inner.class;
+      Constructor cons = c.getDeclaredConstructor();
       Object obj = cons.newInstance();
     }
 }
diff --git a/test/044-proxy/src/BasicTest.java b/test/044-proxy/src/BasicTest.java
index 445a6cc..5f04b93 100644
--- a/test/044-proxy/src/BasicTest.java
+++ b/test/044-proxy/src/BasicTest.java
@@ -99,18 +99,16 @@
         InvocationHandler handler = new MyInvocationHandler(proxyMe);
 
         /* create the proxy class */
-        Class proxyClass = Proxy.getProxyClass(Shapes.class.getClassLoader(),
-                            new Class[] { Quads.class, Colors.class, Trace.class });
+        Class<?> proxyClass = Proxy.getProxyClass(Shapes.class.getClassLoader(),
+                Quads.class, Colors.class, Trace.class);
         Main.registerProxyClassName(proxyClass.getCanonicalName());
 
         /* create a proxy object, passing the handler object in */
         Object proxy = null;
         try {
-            Constructor<Class> cons;
-            cons = proxyClass.getConstructor(
-                            new Class[] { InvocationHandler.class });
+            Constructor<?> cons = proxyClass.getConstructor(InvocationHandler.class);
             //System.out.println("Constructor is " + cons);
-            proxy = cons.newInstance(new Object[] { handler });
+            proxy = cons.newInstance(handler);
         } catch (NoSuchMethodException nsme) {
             System.err.println("failed: " + nsme);
         } catch (InstantiationException ie) {
diff --git a/test/044-proxy/src/Clash.java b/test/044-proxy/src/Clash.java
index adeffdc..d000112 100644
--- a/test/044-proxy/src/Clash.java
+++ b/test/044-proxy/src/Clash.java
@@ -30,7 +30,7 @@
         /* try passing in the same interface twice */
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface1A.class, Interface1A.class },
+                new Class<?>[] { Interface1A.class, Interface1A.class },
                 handler);
             System.err.println("Dupe did not throw expected exception");
         } catch (IllegalArgumentException iae) {
@@ -39,7 +39,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface1A.class, Interface1B.class },
+                new Class<?>[] { Interface1A.class, Interface1B.class },
                 handler);
             System.err.println("Clash did not throw expected exception");
         } catch (IllegalArgumentException iae) {
diff --git a/test/044-proxy/src/Clash2.java b/test/044-proxy/src/Clash2.java
index 2a384f4..e405cfe 100644
--- a/test/044-proxy/src/Clash2.java
+++ b/test/044-proxy/src/Clash2.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] { Interface2A.class, Interface2B.class },
+                new Class<?>[] { Interface2A.class, Interface2B.class },
                 handler);
             System.err.println("Clash2 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
diff --git a/test/044-proxy/src/Clash3.java b/test/044-proxy/src/Clash3.java
index 6d6f2f2..44806ce 100644
--- a/test/044-proxy/src/Clash3.java
+++ b/test/044-proxy/src/Clash3.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] {
+                new Class<?>[] {
                     Interface3a.class,
                     Interface3base.class,
                     Interface3aa.class,
diff --git a/test/044-proxy/src/Clash4.java b/test/044-proxy/src/Clash4.java
index 1bfb37f..ca5c3ab 100644
--- a/test/044-proxy/src/Clash4.java
+++ b/test/044-proxy/src/Clash4.java
@@ -29,7 +29,7 @@
 
         try {
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
-                new Class[] {
+                new Class<?>[] {
                     Interface4a.class,
                     Interface4aa.class,
                     Interface4base.class,
diff --git a/test/044-proxy/src/FloatSelect.java b/test/044-proxy/src/FloatSelect.java
index febe697..217ccaf 100644
--- a/test/044-proxy/src/FloatSelect.java
+++ b/test/044-proxy/src/FloatSelect.java
@@ -34,7 +34,7 @@
     public static void main(String[] args) {
         FloatSelectI proxyObject = (FloatSelectI) Proxy.newProxyInstance(
             FloatSelectI.class.getClassLoader(),
-            new Class[] { FloatSelectI.class },
+            new Class<?>[] { FloatSelectI.class },
             new FloatSelectIInvoke1());
 
         float floatResult = proxyObject.method(2.1f, 5.8f);
diff --git a/test/044-proxy/src/NativeProxy.java b/test/044-proxy/src/NativeProxy.java
index b425da8..c609dc2 100644
--- a/test/044-proxy/src/NativeProxy.java
+++ b/test/044-proxy/src/NativeProxy.java
@@ -40,7 +40,7 @@
         try {
             NativeInterface inf = (NativeInterface)Proxy.newProxyInstance(
                     NativeProxy.class.getClassLoader(),
-                    new Class[] { NativeInterface.class },
+                    new Class<?>[] { NativeInterface.class },
                     new NativeInvocationHandler());
 
             nativeCall(inf);
diff --git a/test/044-proxy/src/ReturnsAndArgPassing.java b/test/044-proxy/src/ReturnsAndArgPassing.java
index 225cc5b..3d8ebf0 100644
--- a/test/044-proxy/src/ReturnsAndArgPassing.java
+++ b/test/044-proxy/src/ReturnsAndArgPassing.java
@@ -98,7 +98,7 @@
     MyInvocationHandler myHandler = new MyInvocationHandler();
     MyInterface proxyMyInterface =
         (MyInterface)Proxy.newProxyInstance(ReturnsAndArgPassing.class.getClassLoader(),
-                                            new Class[] { MyInterface.class },
+                                            new Class<?>[] { MyInterface.class },
                                             myHandler);
     check(fooInvocations == 0);
     proxyMyInterface.voidFoo();
@@ -441,7 +441,7 @@
     MyInvocationHandler myHandler = new MyInvocationHandler();
     MyInterface proxyMyInterface =
         (MyInterface)Proxy.newProxyInstance(ReturnsAndArgPassing.class.getClassLoader(),
-                                            new Class[] { MyInterface.class },
+                                            new Class<?>[] { MyInterface.class },
                                             myHandler);
 
     check((Integer)proxyMyInterface.selectArg(0, Integer.MAX_VALUE, Long.MAX_VALUE,
diff --git a/test/044-proxy/src/WrappedThrow.java b/test/044-proxy/src/WrappedThrow.java
index 27ae84e..643ba05 100644
--- a/test/044-proxy/src/WrappedThrow.java
+++ b/test/044-proxy/src/WrappedThrow.java
@@ -32,7 +32,7 @@
 
         try {
             proxy = Proxy.newProxyInstance(WrappedThrow.class.getClassLoader(),
-                new Class[] { InterfaceW1.class, InterfaceW2.class },
+                new Class<?>[] { InterfaceW1.class, InterfaceW2.class },
                 handler);
         } catch (IllegalArgumentException iae) {
             System.out.println("WT init failed");
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 67a0d11..10dad8d 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -32,7 +32,7 @@
     public Main(ArrayList<Integer> stuff) {}
 
     void printMethodInfo(Method meth) {
-        Class[] params, exceptions;
+        Class<?>[] params, exceptions;
         int i;
 
         System.out.println("Method name is " + meth.getName());
@@ -62,7 +62,7 @@
     private void showStrings(Target instance)
         throws NoSuchFieldException, IllegalAccessException {
 
-        Class target = Target.class;
+        Class<?> target = Target.class;
         String one, two, three, four;
         Field field = null;
 
@@ -80,15 +80,15 @@
 
     public static void checkAccess() {
         try {
-            Class target = otherpackage.Other.class;
+            Class<?> target = otherpackage.Other.class;
             Object instance = new otherpackage.Other();
             Method meth;
 
-            meth = target.getMethod("publicMethod", (Class[]) null);
+            meth = target.getMethod("publicMethod");
             meth.invoke(instance);
 
             try {
-                meth = target.getMethod("packageMethod", (Class[]) null);
+                meth = target.getMethod("packageMethod");
                 System.err.println("succeeded on package-scope method");
             } catch (NoSuchMethodException nsme) {
                 // good
@@ -97,7 +97,7 @@
 
             instance = otherpackage.Other.getInnerClassInstance();
             target = instance.getClass();
-            meth = target.getMethod("innerMethod", (Class[]) null);
+            meth = target.getMethod("innerMethod");
             try {
                 if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
                 meth.invoke(instance);
@@ -121,26 +121,25 @@
     }
 
     public void run() {
-        Class target = Target.class;
+        Class<Target> target = Target.class;
         Method meth = null;
         Field field = null;
         boolean excep;
 
         try {
-            meth = target.getMethod("myMethod", new Class[] { int.class });
+            meth = target.getMethod("myMethod", int.class);
 
             if (meth.getDeclaringClass() != target)
                 throw new RuntimeException();
             printMethodInfo(meth);
 
-            meth = target.getMethod("myMethod", new Class[] { float.class });
+            meth = target.getMethod("myMethod", float.class);
             printMethodInfo(meth);
 
-            meth = target.getMethod("myNoargMethod", (Class[]) null);
+            meth = target.getMethod("myNoargMethod");
             printMethodInfo(meth);
 
-            meth = target.getMethod("myMethod",
-                new Class[] { String[].class, float.class, char.class });
+            meth = target.getMethod("myMethod", String[].class, float.class, char.class);
             printMethodInfo(meth);
 
             Target instance = new Target();
@@ -157,11 +156,11 @@
             System.out.println("Result of invoke: " + boxval.intValue());
 
             System.out.println("Calling no-arg void-return method");
-            meth = target.getMethod("myNoargMethod", (Class[]) null);
+            meth = target.getMethod("myNoargMethod");
             meth.invoke(instance, (Object[]) null);
 
             /* try invoking a method that throws an exception */
-            meth = target.getMethod("throwingMethod", (Class[]) null);
+            meth = target.getMethod("throwingMethod");
             try {
                 meth.invoke(instance, (Object[]) null);
                 System.out.println("GLITCH: didn't throw");
@@ -372,7 +371,7 @@
             Target targ;
             Object[] args;
 
-            cons = target.getConstructor(new Class[] { int.class,float.class });
+            cons = target.getConstructor(int.class, float.class);
             args = new Object[] { new Integer(7), new Float(3.3333) };
             System.out.println("cons modifiers=" + cons.getModifiers());
             targ = cons.newInstance(args);
@@ -458,7 +457,7 @@
     public static void checkClinitForFields() throws Exception {
       // Loading a class constant shouldn't run <clinit>.
       System.out.println("calling const-class FieldNoisyInitUser.class");
-      Class niuClass = FieldNoisyInitUser.class;
+      Class<?> niuClass = FieldNoisyInitUser.class;
       System.out.println("called const-class FieldNoisyInitUser.class");
 
       // Getting the declared fields doesn't run <clinit>.
@@ -480,14 +479,14 @@
     public static void checkClinitForMethods() throws Exception {
       // Loading a class constant shouldn't run <clinit>.
       System.out.println("calling const-class MethodNoisyInitUser.class");
-      Class niuClass = MethodNoisyInitUser.class;
+      Class<?> niuClass = MethodNoisyInitUser.class;
       System.out.println("called const-class MethodNoisyInitUser.class");
 
       // Getting the declared methods doesn't run <clinit>.
       Method[] methods = niuClass.getDeclaredMethods();
       System.out.println("got methods");
 
-      Method method = niuClass.getMethod("staticMethod", (Class[]) null);
+      Method method = niuClass.getMethod("staticMethod");
       System.out.println("got method");
       method.invoke(null);
       System.out.println("invoked method");
@@ -517,8 +516,7 @@
 
         Method method;
         try {
-            method = Main.class.getMethod("fancyMethod",
-                new Class[] { ArrayList.class });
+            method = Main.class.getMethod("fancyMethod", ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -527,9 +525,9 @@
         System.out.println("generic method " + method.getName() + " params='"
             + stringifyTypeArray(parmTypes) + "' ret='" + ret + "'");
 
-        Constructor ctor;
+        Constructor<?> ctor;
         try {
-            ctor = Main.class.getConstructor(new Class[] { ArrayList.class });
+            ctor = Main.class.getConstructor( ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
@@ -580,8 +578,8 @@
         }
         Method method1, method2;
         try {
-            method1 = Main.class.getMethod("fancyMethod", new Class[] { ArrayList.class });
-            method2 = Main.class.getMethod("fancyMethod", new Class[] { ArrayList.class });
+            method1 = Main.class.getMethod("fancyMethod", ArrayList.class);
+            method2 = Main.class.getMethod("fancyMethod", ArrayList.class);
         } catch (NoSuchMethodException nsme) {
             throw new RuntimeException(nsme);
         }
diff --git a/test/051-thread/thread_test.cc b/test/051-thread/thread_test.cc
index 4215207..079ad40 100644
--- a/test/051-thread/thread_test.cc
+++ b/test/051-thread/thread_test.cc
@@ -28,7 +28,7 @@
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_supportsThreadPriorities(
     JNIEnv* env ATTRIBUTE_UNUSED,
     jclass clazz ATTRIBUTE_UNUSED) {
-#if defined(__ANDROID__)
+#if defined(ART_TARGET_ANDROID)
   return JNI_TRUE;
 #else
   return JNI_FALSE;
diff --git a/test/064-field-access/src/Main.java b/test/064-field-access/src/Main.java
index 5d90129..50ad5b9 100644
--- a/test/064-field-access/src/Main.java
+++ b/test/064-field-access/src/Main.java
@@ -38,7 +38,7 @@
     }
 
     try {
-      Class c = Class.forName("SubClassUsingInaccessibleField");
+      Class<?> c = Class.forName("SubClassUsingInaccessibleField");
       Object o = c.newInstance();
       c.getMethod("test").invoke(o, null);
     } catch (InvocationTargetException ite) {
@@ -64,7 +64,7 @@
    * On success, the boxed value retrieved is returned.
    */
   public Object getValue(Field field, Object obj, char type,
-      Class expectedException) {
+      Class<?> expectedException) {
     Object result = null;
     try {
       switch (type) {
@@ -638,7 +638,7 @@
    * reflection call is significant]
    */
   public Object getValue(Field field, Object obj, char type,
-      Class expectedException) {
+      Class<?> expectedException) {
     Object result = null;
     try {
       switch (type) {
@@ -698,7 +698,7 @@
     return result;
   }
 
-  public Object invoke(Method method, Object obj, Class expectedException) {
+  public Object invoke(Method method, Object obj, Class<?> expectedException) {
     Object result = null;
     try {
       result = method.invoke(obj);
diff --git a/test/068-classloader/src/FancyLoader.java b/test/068-classloader/src/FancyLoader.java
index 6a153cc..e616bfc 100644
--- a/test/068-classloader/src/FancyLoader.java
+++ b/test/068-classloader/src/FancyLoader.java
@@ -41,7 +41,7 @@
     static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar";
 
     /* on Dalvik, this is a DexFile; otherwise, it's null */
-    private Class mDexClass;
+    private Class<?> mDexClass;
 
     private Object mDexFile;
 
@@ -82,12 +82,12 @@
 
         if (mDexFile == null) {
             synchronized (FancyLoader.class) {
-                Constructor ctor;
+                Constructor<?> ctor;
                 /*
                  * Construct a DexFile object through reflection.
                  */
                 try {
-                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                    ctor = mDexClass.getConstructor(String.class);
                 } catch (NoSuchMethodException nsme) {
                     throw new ClassNotFoundException("getConstructor failed",
                         nsme);
@@ -111,8 +111,7 @@
         Method meth;
 
         try {
-            meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
+            meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
         } catch (NoSuchMethodException nsme) {
             throw new ClassNotFoundException("getMethod failed", nsme);
         }
@@ -184,7 +183,7 @@
     protected Class<?> loadClass(String name, boolean resolve)
         throws ClassNotFoundException
     {
-        Class res;
+        Class<?> res;
 
         /*
          * 1. Invoke findLoadedClass(String) to check if the class has
diff --git a/test/068-classloader/src/Main.java b/test/068-classloader/src/Main.java
index b2d843b..01539b7 100644
--- a/test/068-classloader/src/Main.java
+++ b/test/068-classloader/src/Main.java
@@ -74,11 +74,10 @@
             /* this is the "alternate" DEX/Jar file */
             String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar";
             /* on Dalvik, this is a DexFile; otherwise, it's null */
-            Class mDexClass = Class.forName("dalvik.system.DexFile");
-            Constructor ctor = mDexClass.getConstructor(new Class[] {String.class});
+            Class<?> mDexClass = Class.forName("dalvik.system.DexFile");
+            Constructor<?> ctor = mDexClass.getConstructor(String.class);
             Object mDexFile = ctor.newInstance(DEX_FILE);
-            Method meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
+            Method meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
             Object klass = meth.invoke(mDexFile, "Mutator", null);
             if (klass == null) {
                 throw new AssertionError("loadClass with nullclass loader failed");
@@ -94,15 +93,15 @@
         FancyLoader loader2 = new FancyLoader(ClassLoader.getSystemClassLoader());
 
         try {
-            Class target1 = loader1.loadClass("MutationTarget");
-            Class target2 = loader2.loadClass("MutationTarget");
+            Class<?> target1 = loader1.loadClass("MutationTarget");
+            Class<?> target2 = loader2.loadClass("MutationTarget");
 
             if (target1 == target2) {
                 throw new RuntimeException("target1 should not be equal to target2");
             }
 
-            Class mutator1 = loader1.loadClass("Mutator");
-            Class mutator2 = loader2.loadClass("Mutator");
+            Class<?> mutator1 = loader1.loadClass("Mutator");
+            Class<?> mutator2 = loader2.loadClass("Mutator");
 
             if (mutator1 == mutator2) {
                 throw new RuntimeException("mutator1 should not be equal to mutator2");
@@ -134,12 +133,12 @@
         }
     }
 
-    private static void runMutator(Class c, int v) throws Exception {
+    private static void runMutator(Class<?> c, int v) throws Exception {
         java.lang.reflect.Method m = c.getDeclaredMethod("mutate", int.class);
         m.invoke(null, v);
     }
 
-    private static int getMutationTargetValue(Class c) throws Exception {
+    private static int getMutationTargetValue(Class<?> c) throws Exception {
         java.lang.reflect.Field f = c.getDeclaredField("value");
         return f.getInt(null);
     }
@@ -149,7 +148,7 @@
      * able to load it but not instantiate it.
      */
     static void testAccess1(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible1");
@@ -179,7 +178,7 @@
      * (though the base *is* accessible to us).
      */
     static void testAccess2(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible2");
@@ -199,7 +198,7 @@
      * See if we can load a class with an inaccessible interface.
      */
     static void testAccess3(ClassLoader loader) {
-        Class altClass;
+        Class<?> altClass;
 
         try {
             altClass = loader.loadClass("Inaccessible3");
@@ -219,7 +218,7 @@
      * Test a doubled class that extends the base class.
      */
     static void testExtend(ClassLoader loader) {
-        Class doubledExtendClass;
+        Class<?> doubledExtendClass;
         Object obj;
 
         /* get the "alternate" version of DoubledExtend */
@@ -268,7 +267,7 @@
      * it doesn't override the base class method.
      */
     static void testExtendOkay(ClassLoader loader) {
-        Class doubledExtendOkayClass;
+        Class<?> doubledExtendOkayClass;
         Object obj;
 
         /* get the "alternate" version of DoubledExtendOkay */
@@ -316,7 +315,7 @@
      * an interface declared in a different class.
      */
     static void testInterface(ClassLoader loader) {
-        Class getDoubledClass;
+        Class<?> getDoubledClass;
         Object obj;
 
         /* get GetDoubled from the "alternate" class loader */
@@ -362,7 +361,7 @@
      * Throw an abstract class into the middle and see what happens.
      */
     static void testAbstract(ClassLoader loader) {
-        Class abstractGetClass;
+        Class<?> abstractGetClass;
         Object obj;
 
         /* get AbstractGet from the "alternate" loader */
@@ -407,7 +406,7 @@
      * Test a doubled class that implements a common interface.
      */
     static void testImplement(ClassLoader loader) {
-        Class doubledImplementClass;
+        Class<?> doubledImplementClass;
         Object obj;
 
         useImplement(new DoubledImplement(), true);
@@ -465,7 +464,7 @@
      * that refers to a doubled class.
      */
     static void testIfaceImplement(ClassLoader loader) {
-        Class ifaceImplClass;
+        Class<?> ifaceImplClass;
         Object obj;
 
         /*
diff --git a/test/071-dexfile/src/Main.java b/test/071-dexfile/src/Main.java
index 2f85790..c3a9671 100644
--- a/test/071-dexfile/src/Main.java
+++ b/test/071-dexfile/src/Main.java
@@ -66,7 +66,7 @@
      */
     private static void testDexClassLoader() throws Exception {
         ClassLoader dexClassLoader = getDexClassLoader();
-        Class Another = dexClassLoader.loadClass("Another");
+        Class<?> Another = dexClassLoader.loadClass("Another");
         Object another = Another.newInstance();
         // not expected to work; just exercises the call
         dexClassLoader.getResource("nonexistent");
@@ -79,18 +79,21 @@
      */
     private static ClassLoader getDexClassLoader() throws Exception {
         ClassLoader classLoader = Main.class.getClassLoader();
-        Class DexClassLoader = classLoader.loadClass("dalvik.system.DexClassLoader");
-        Constructor DexClassLoader_init = DexClassLoader.getConstructor(String.class,
-                                                                        String.class,
-                                                                        String.class,
-                                                                        ClassLoader.class);
+        Class<?> DexClassLoader = classLoader.loadClass("dalvik.system.DexClassLoader");
+        Constructor<?> DexClassLoader_init = DexClassLoader.getConstructor(String.class,
+                                                                           String.class,
+                                                                           String.class,
+                                                                           ClassLoader.class);
         // create an instance, using the path we found
-        return (ClassLoader) DexClassLoader_init.newInstance(CLASS_PATH, getOdexDir(), LIB_DIR, classLoader);
+        return (ClassLoader) DexClassLoader_init.newInstance(CLASS_PATH,
+                                                             getOdexDir(),
+                                                             LIB_DIR,
+                                                             classLoader);
     }
 
     private static void testDexFile() throws Exception {
         ClassLoader classLoader = Main.class.getClassLoader();
-        Class DexFile = classLoader.loadClass("dalvik.system.DexFile");
+        Class<?> DexFile = classLoader.loadClass("dalvik.system.DexFile");
         Method DexFile_loadDex = DexFile.getMethod("loadDex",
                                                    String.class,
                                                    String.class,
diff --git a/test/074-gc-thrash/src/Main.java b/test/074-gc-thrash/src/Main.java
index f947d0b..df04793 100644
--- a/test/074-gc-thrash/src/Main.java
+++ b/test/074-gc-thrash/src/Main.java
@@ -69,7 +69,7 @@
      */
     private static Method getDumpHprofDataMethod() {
         ClassLoader myLoader = Main.class.getClassLoader();
-        Class vmdClass;
+        Class<?> vmdClass;
         try {
             vmdClass = myLoader.loadClass("dalvik.system.VMDebug");
         } catch (ClassNotFoundException cnfe) {
@@ -78,8 +78,7 @@
 
         Method meth;
         try {
-            meth = vmdClass.getMethod("dumpHprofData",
-                    new Class[] { String.class });
+            meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
             System.err.println("Found VMDebug but not dumpHprofData method");
             return null;
diff --git a/test/080-oom-throw/src/Main.java b/test/080-oom-throw/src/Main.java
index f007b25..0ae92a9 100644
--- a/test/080-oom-throw/src/Main.java
+++ b/test/080-oom-throw/src/Main.java
@@ -105,7 +105,7 @@
     static boolean triggerReflectionOOM() {
         try {
             Class<?> c = Main.class;
-            Method m = c.getMethod("blowup", (Class[]) null);
+            Method m = c.getMethod("blowup");
             holder = new Object[1000000];
             m.invoke(null);
             holder = null;
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 9aaed9d..06f193a 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -808,10 +808,21 @@
     Assert.assertEquals(Math.round(-2.9d), -3l);
     Assert.assertEquals(Math.round(-3.0d), -3l);
     Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(Math.round(4503599627370495.0d), 4503599627370495l);  // 2^52 - 1
+    Assert.assertEquals(Math.round(4503599627370495.5d), 4503599627370496l);  // 2^52 - 0.5
+    Assert.assertEquals(Math.round(4503599627370496.0d), 4503599627370496l);  // 2^52
+    Assert.assertEquals(Math.round(-4503599627370495.0d), -4503599627370495l);  // -(2^52 - 1)
+    Assert.assertEquals(Math.round(-4503599627370495.5d), -4503599627370495l);  // -(2^52 - 0.5)
+    Assert.assertEquals(Math.round(-4503599627370496.0d), -4503599627370496l);  // -2^52
     Assert.assertEquals(Math.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
+    Assert.assertEquals(Math.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
+    Assert.assertEquals(Math.round(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Long.MAX_VALUE); // 2^64
+    Assert.assertEquals(Math.round(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Long.MIN_VALUE); // -2^64
     Assert.assertEquals(Math.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE);
   }
@@ -832,10 +843,23 @@
     Assert.assertEquals(Math.round(-3.0f), -3);
     // 0.4999999701976776123046875
     Assert.assertEquals(Math.round(Float.intBitsToFloat(0x3EFFFFFF)), (int)+0.0f);
+    Assert.assertEquals(Math.round(8388607.0f), 8388607);  // 2^23 - 1
+    Assert.assertEquals(Math.round(8388607.5f), 8388608);  // 2^23 - 0.5
+    Assert.assertEquals(Math.round(8388608.0f), 8388608);  // 2^23
+    Assert.assertEquals(Math.round(-8388607.0f), -8388607);  // -(2^23 - 1)
+    Assert.assertEquals(Math.round(-8388607.5f), -8388607);  // -(2^23 - 0.5)
+    Assert.assertEquals(Math.round(-8388608.0f), -8388608);  // -2^23
     Assert.assertEquals(Math.round(16777215.0f), 16777215);  // 2^24 - 1
+    Assert.assertEquals(Math.round(16777216.0f), 16777216);  // 2^24
+    Assert.assertEquals(Math.round(-16777215.0f), -16777215);  // -(2^24 - 1)
+    Assert.assertEquals(Math.round(-16777216.0f), -16777216);  // -2^24
     Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0x4F800000)),
+                        Integer.MAX_VALUE); // 2^32
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0xCF800000)),
+                        Integer.MIN_VALUE); // -2^32
     Assert.assertEquals(Math.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
@@ -1143,10 +1167,21 @@
     Assert.assertEquals(StrictMath.round(-2.9d), -3l);
     Assert.assertEquals(StrictMath.round(-3.0d), -3l);
     Assert.assertEquals(StrictMath.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(StrictMath.round(4503599627370495.0d), 4503599627370495l);  // 2^52 - 1
+    Assert.assertEquals(StrictMath.round(4503599627370495.5d), 4503599627370496l);  // 2^52 - 0.5
+    Assert.assertEquals(StrictMath.round(4503599627370496.0d), 4503599627370496l);  // 2^52
+    Assert.assertEquals(StrictMath.round(-4503599627370495.0d), -4503599627370495l);  // -(2^52 - 1)
+    Assert.assertEquals(StrictMath.round(-4503599627370495.5d), -4503599627370495l);  // -(2^52 - 0.5)
+    Assert.assertEquals(StrictMath.round(-4503599627370496.0d), -4503599627370496l);  // -2^52
     Assert.assertEquals(StrictMath.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
+    Assert.assertEquals(StrictMath.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
+    Assert.assertEquals(StrictMath.round(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Long.MAX_VALUE); // 2^64
+    Assert.assertEquals(StrictMath.round(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Long.MIN_VALUE); // -2^64
     Assert.assertEquals(StrictMath.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE);
   }
@@ -1167,10 +1202,23 @@
     Assert.assertEquals(StrictMath.round(-3.0f), -3);
     // 0.4999999701976776123046875
     Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0x3EFFFFFF)), (int)+0.0f);
+    Assert.assertEquals(StrictMath.round(8388607.0f), 8388607);  // 2^23 - 1
+    Assert.assertEquals(StrictMath.round(8388607.5f), 8388608);  // 2^23 - 0.5
+    Assert.assertEquals(StrictMath.round(8388608.0f), 8388608);  // 2^23
+    Assert.assertEquals(StrictMath.round(-8388607.0f), -8388607);  // -(2^23 - 1)
+    Assert.assertEquals(StrictMath.round(-8388607.5f), -8388607);  // -(2^23 - 0.5)
+    Assert.assertEquals(StrictMath.round(-8388608.0f), -8388608);  // -2^23
     Assert.assertEquals(StrictMath.round(16777215.0f), 16777215);  // 2^24 - 1
+    Assert.assertEquals(StrictMath.round(16777216.0f), 16777216);  // 2^24
+    Assert.assertEquals(StrictMath.round(-16777215.0f), -16777215);  // -(2^24 - 1)
+    Assert.assertEquals(StrictMath.round(-16777216.0f), -16777216);  // -2^24
     Assert.assertEquals(StrictMath.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(StrictMath.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0x4F800000)),
+                        Integer.MAX_VALUE); // 2^32
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0xCF800000)),
+                        Integer.MIN_VALUE); // -2^32
     Assert.assertEquals(StrictMath.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
diff --git a/test/086-null-super/src/Main.java b/test/086-null-super/src/Main.java
index 060737f..8bd1786 100644
--- a/test/086-null-super/src/Main.java
+++ b/test/086-null-super/src/Main.java
@@ -75,14 +75,12 @@
                  * Find the DexFile class, and construct a DexFile object
                  * through reflection, then call loadCLass on it.
                  */
-                Class mDexClass = ClassLoader.getSystemClassLoader().
+                Class<?> mDexClass = ClassLoader.getSystemClassLoader().
                         loadClass("dalvik.system.DexFile");
-                Constructor ctor = mDexClass.
-                        getConstructor(new Class[] {String.class});
+                Constructor<?> ctor = mDexClass.getConstructor(String.class);
                 Object mDexFile = ctor.newInstance(DEX_FILE);
                 Method meth = mDexClass.
-                        getMethod("loadClass",
-                            new Class[] { String.class, ClassLoader.class });
+                        getMethod("loadClass", String.class, ClassLoader.class);
                 /*
                  * Invoking loadClass on CLASS_NAME is expected to
                  * throw an InvocationTargetException. Anything else
diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java
index 7c47e99..698af0b 100644
--- a/test/087-gc-after-link/src/Main.java
+++ b/test/087-gc-after-link/src/Main.java
@@ -70,7 +70,7 @@
                 throws TestFailed, InvocationTargetException
         {
             Object dexFile = null;
-            Class dexClass = null;
+            Class<?> dexClass = null;
 
             try {
                 try {
@@ -80,11 +80,9 @@
                      */
                     dexClass = ClassLoader.getSystemClassLoader().
                             loadClass("dalvik.system.DexFile");
-                    Constructor ctor = dexClass.
-                            getConstructor(new Class[] {String.class});
+                    Constructor<?> ctor = dexClass.getConstructor(String.class);
                     dexFile = ctor.newInstance(DEX_FILE);
-                    Method meth = dexClass.getMethod("loadClass",
-                            new Class[] { String.class, ClassLoader.class });
+                    Method meth = dexClass.getMethod("loadClass", String.class, ClassLoader.class);
                     /*
                      * Invoking loadClass on CLASS_NAME is expected to
                      * throw an InvocationTargetException. Anything else
@@ -95,7 +93,7 @@
                 } finally {
                     if (dexFile != null) {
                         /* close the DexFile to make CloseGuard happy */
-                        Method meth = dexClass.getMethod("close", (Class[]) null);
+                        Method meth = dexClass.getMethod("close");
                         meth.invoke(dexFile);
                     }
                 }
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index 212c894..a6f0e64 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -100,7 +100,7 @@
      */
     void constantLock() {
         assertIsManaged();
-        Class thing = Thread.class;
+        Class<?> thing = Thread.class;
         synchronized (Thread.class) {}
     }
 
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index f41ff2a..72c5a28 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -44,7 +44,12 @@
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
         for (int i = 0; i < overflowAllocations; i++) {
-            new Object();
+            new Object() {
+                // Add a finalizer so that the allocation won't be eliminated.
+                public void finalize() {
+                    System.out.print("");
+                }
+            };
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
@@ -131,7 +136,7 @@
         private static final Method getRecentAllocationsMethod;
         static {
             try {
-                Class c = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+                Class<?> c = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
                 enableRecentAllocationsMethod = c.getDeclaredMethod("enableRecentAllocations",
                                                                     Boolean.TYPE);
                 getRecentAllocationStatusMethod = c.getDeclaredMethod("getRecentAllocationStatus");
diff --git a/test/099-vmdebug/src/Main.java b/test/099-vmdebug/src/Main.java
index 1be5765..90ad315 100644
--- a/test/099-vmdebug/src/Main.java
+++ b/test/099-vmdebug/src/Main.java
@@ -133,7 +133,7 @@
             System.out.println("Got null string");
             return;
         }
-        long n = Long.valueOf(s);
+        long n = Long.parseLong(s);
         if (n < 0) {
             System.out.println("Got negative number " + n);
         }
@@ -157,8 +157,8 @@
                 System.out.println("Got bad bucket " + bucket);
                 continue;
             }
-            long key = Long.valueOf(kv[0]);
-            long value = Long.valueOf(kv[1]);
+            long key = Long.parseLong(kv[0]);
+            long value = Long.parseLong(kv[1]);
             if (key < 0 || value < 0) {
                 System.out.println("Got negative key or value " + bucket);
                 continue;
@@ -242,7 +242,7 @@
         System.out.println("Instances of null " + VMDebug.countInstancesofClass(null, false));
         System.out.println("Instances of ClassA assignable " +
                 VMDebug.countInstancesofClass(ClassA.class, true));
-        Class[] classes = new Class[]{ClassA.class, ClassB.class, null};
+        Class<?>[] classes = new Class<?>[] {ClassA.class, ClassB.class, null};
         long[] counts = VMDebug.countInstancesofClasses(classes, false);
         System.out.println("Array counts " + Arrays.toString(counts));
         counts = VMDebug.countInstancesofClasses(classes, true);
@@ -259,7 +259,7 @@
         private static final Method countInstancesOfClassesMethod;
         static {
             try {
-                Class c = Class.forName("dalvik.system.VMDebug");
+                Class<?> c = Class.forName("dalvik.system.VMDebug");
                 startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
                         Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
                 stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
@@ -292,10 +292,10 @@
         public static Map<String, String> getRuntimeStats() throws Exception {
             return (Map<String, String>) getRuntimeStatsMethod.invoke(null);
         }
-        public static long countInstancesofClass(Class c, boolean assignable) throws Exception {
+        public static long countInstancesofClass(Class<?> c, boolean assignable) throws Exception {
             return (long) countInstancesOfClassMethod.invoke(null, new Object[]{c, assignable});
         }
-        public static long[] countInstancesofClasses(Class[] classes, boolean assignable)
+        public static long[] countInstancesofClasses(Class<?>[] classes, boolean assignable)
                 throws Exception {
             return (long[]) countInstancesOfClassesMethod.invoke(
                     null, new Object[]{classes, assignable});
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index d878e69..dd89d64 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -33,7 +33,7 @@
 14 (class java.lang.Short)
 [java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
 [private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
-[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int)]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private boolean java.lang.String.nonSyncContentEquals(java.lang.AbstractStringBuilder), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.CharSequence[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.Iterable), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int), void java.lang.String.getChars(char[],int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index 1245852..91ba307 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -275,10 +275,8 @@
   }
 
   public static void testConstructorReflection() throws Exception {
-    Constructor<?> ctor;
-
-    ctor = String.class.getConstructor(new Class[0]);
-    show(ctor.newInstance((Object[]) null));
+    Constructor<String> ctor = String.class.getConstructor();
+    show(ctor.newInstance());
 
     ctor = String.class.getConstructor(char[].class, int.class, int.class);
     show(ctor.newInstance(new char[] { '\u2714', 'y', 'z', '!' }, 1, 2));
@@ -287,7 +285,7 @@
   private static void testPackagePrivateConstructor() {
     try {
       Class<?> c = Class.forName("sub.PPClass");
-      Constructor cons = c.getConstructor();
+      Constructor<?> cons = c.getConstructor();
       cons.newInstance();
       throw new RuntimeException("Expected IllegalAccessException.");
     } catch (IllegalAccessException e) {
@@ -301,7 +299,7 @@
   private static void testPackagePrivateAccessibleConstructor() {
     try {
       Class<?> c = Class.forName("sub.PPClass");
-      Constructor cons = c.getConstructor();
+      Constructor<?> cons = c.getConstructor();
       cons.setAccessible(true);  // ensure we prevent IllegalAccessException
       cons.newInstance();
     } catch (Exception e) {
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index 0c91d44..ec5678d 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -104,7 +104,7 @@
     }
 
     static int constClassTest(int x) {
-        Class c = String.class;
+        Class<?> c = String.class;
         if (c != null) {
            return x * 2;
         } else {
diff --git a/test/118-noimage-dex2oat/src/Main.java b/test/118-noimage-dex2oat/src/Main.java
index dba9166..cc19107 100644
--- a/test/118-noimage-dex2oat/src/Main.java
+++ b/test/118-noimage-dex2oat/src/Main.java
@@ -51,7 +51,7 @@
     private static final Method isBootClassPathOnDiskMethod;
     static {
         try {
-            Class c = Class.forName("dalvik.system.VMRuntime");
+            Class<?> c = Class.forName("dalvik.system.VMRuntime");
             getCurrentInstructionSetMethod = c.getDeclaredMethod("getCurrentInstructionSet");
             isBootClassPathOnDiskMethod = c.getDeclaredMethod("isBootClassPathOnDisk",
                                                               String.class);
diff --git a/test/125-gc-and-classloading/src/Main.java b/test/125-gc-and-classloading/src/Main.java
index 61e123d..e81ef7b 100644
--- a/test/125-gc-and-classloading/src/Main.java
+++ b/test/125-gc-and-classloading/src/Main.java
@@ -57,7 +57,7 @@
         public void run() {
             try {
                 cdl.await();
-                Class c0 = Class.forName("Main$BigClass");
+                Class<?> c0 = Class.forName("Main$BigClass");
             } catch (Exception e) {
                 throw new RuntimeException(e);
             }
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
index 9868c61..c145f27 100644
--- a/test/130-hprof/src/Main.java
+++ b/test/130-hprof/src/Main.java
@@ -37,15 +37,15 @@
 
     private static Object allocInDifferentLoader() throws Exception {
         final String DEX_FILE = System.getenv("DEX_LOCATION") + "/130-hprof-ex.jar";
-        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
         if (pathClassLoader == null) {
             throw new AssertionError("Couldn't find path class loader class");
         }
-        Constructor constructor =
+        Constructor<?> constructor =
             pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
         ClassLoader loader = (ClassLoader)constructor.newInstance(
                 DEX_FILE, ClassLoader.getSystemClassLoader());
-        Class allocator = loader.loadClass("Allocator");
+        Class<?> allocator = loader.loadClass("Allocator");
         return allocator.getDeclaredMethod("allocObject", null).invoke(null);
     }
 
@@ -105,7 +105,7 @@
         System.out.println("Generated data.");
 
         createDumpAndConv();
-        Class klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+        Class<?> klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
         if (klass == null) {
             throw new AssertionError("Couldn't find path class loader class");
         }
@@ -153,7 +153,7 @@
      */
     private static Method getDumpHprofDataMethod() {
         ClassLoader myLoader = Main.class.getClassLoader();
-        Class vmdClass;
+        Class<?> vmdClass;
         try {
             vmdClass = myLoader.loadClass("dalvik.system.VMDebug");
         } catch (ClassNotFoundException cnfe) {
@@ -162,8 +162,7 @@
 
         Method meth;
         try {
-            meth = vmdClass.getMethod("dumpHprofData",
-                    new Class[] { String.class });
+            meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
             System.err.println("Found VMDebug but not dumpHprofData method");
             return null;
diff --git a/test/134-reg-promotion/src/Main.java b/test/134-reg-promotion/src/Main.java
index 008ac58..f633524 100644
--- a/test/134-reg-promotion/src/Main.java
+++ b/test/134-reg-promotion/src/Main.java
@@ -32,13 +32,13 @@
 
     public static void main(String args[]) throws Exception {
         Class<?> c = Class.forName("Test");
-        Method m = c.getMethod("run", (Class[]) null);
+        Method m = c.getMethod("run");
         for (int i = 0; i < 10; i++) {
             holder = new char[128 * 1024][];
             m.invoke(null, (Object[]) null);
             holder = null;
         }
-        m = c.getMethod("run2", (Class[]) null);
+        m = c.getMethod("run2");
         for (int i = 0; i < 10; i++) {
             holder = new char[128 * 1024][];
             m.invoke(null, (Object[]) null);
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
index c9110a9..b729301 100644
--- a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -27,8 +27,20 @@
 namespace {
 
 static volatile std::atomic<bool> vm_was_shutdown(false);
+static const int kThreadCount = 4;
+
+static std::atomic<int> barrier_count(kThreadCount + 1);
+
+static void JniThreadBarrierWait() {
+  barrier_count--;
+  while (barrier_count.load() != 0) {
+    usleep(1000);
+  }
+}
 
 extern "C" JNIEXPORT void JNICALL Java_Main_waitAndCallIntoJniEnv(JNIEnv* env, jclass) {
+  // Wait for all threads to enter JNI together.
+  JniThreadBarrierWait();
   // Wait until the runtime is shutdown.
   while (!vm_was_shutdown.load()) {
     usleep(1000);
@@ -40,6 +52,8 @@
 
 // NO_RETURN does not work with extern "C" for target builds.
 extern "C" JNIEXPORT void JNICALL Java_Main_destroyJavaVMAndExit(JNIEnv* env, jclass) {
+  // Wait for all threads to enter JNI together.
+  JniThreadBarrierWait();
   // Fake up the managed stack so we can detach.
   Thread* const self = Thread::Current();
   self->SetTopOfStack(nullptr);
diff --git a/test/138-duplicate-classes-check/src/Main.java b/test/138-duplicate-classes-check/src/Main.java
index a2ef281..5ffceb9 100644
--- a/test/138-duplicate-classes-check/src/Main.java
+++ b/test/138-duplicate-classes-check/src/Main.java
@@ -38,7 +38,7 @@
                 getClass().getClassLoader());
 
         try {
-            Class testEx = loader.loadClass("TestEx");
+            Class<?> testEx = loader.loadClass("TestEx");
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
diff --git a/test/138-duplicate-classes-check2/src/FancyLoader.java b/test/138-duplicate-classes-check2/src/FancyLoader.java
index 7e2bb08..58b7ec4 100644
--- a/test/138-duplicate-classes-check2/src/FancyLoader.java
+++ b/test/138-duplicate-classes-check2/src/FancyLoader.java
@@ -42,7 +42,7 @@
             "/138-duplicate-classes-check2-ex.jar";
 
     /* on Dalvik, this is a DexFile; otherwise, it's null */
-    private Class mDexClass;
+    private Class<?> mDexClass;
 
     private Object mDexFile;
 
@@ -83,12 +83,12 @@
 
         if (mDexFile == null) {
             synchronized (FancyLoader.class) {
-                Constructor ctor;
+                Constructor<?> ctor;
                 /*
                  * Construct a DexFile object through reflection.
                  */
                 try {
-                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                    ctor = mDexClass.getConstructor(String.class);
                 } catch (NoSuchMethodException nsme) {
                     throw new ClassNotFoundException("getConstructor failed",
                         nsme);
@@ -112,8 +112,7 @@
         Method meth;
 
         try {
-            meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
+            meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
         } catch (NoSuchMethodException nsme) {
             throw new ClassNotFoundException("getMethod failed", nsme);
         }
@@ -185,7 +184,7 @@
     protected Class<?> loadClass(String name, boolean resolve)
         throws ClassNotFoundException
     {
-        Class res;
+        Class<?> res;
 
         /*
          * 1. Invoke findLoadedClass(String) to check if the class has
diff --git a/test/138-duplicate-classes-check2/src/Main.java b/test/138-duplicate-classes-check2/src/Main.java
index a9b5bb0..a0d6977 100644
--- a/test/138-duplicate-classes-check2/src/Main.java
+++ b/test/138-duplicate-classes-check2/src/Main.java
@@ -33,7 +33,7 @@
         FancyLoader loader = new FancyLoader(getClass().getClassLoader());
 
         try {
-            Class testEx = loader.loadClass("TestEx");
+            Class<?> testEx = loader.loadClass("TestEx");
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
diff --git a/test/139-register-natives/src/Main.java b/test/139-register-natives/src/Main.java
index 8dd2131..11bd53f 100644
--- a/test/139-register-natives/src/Main.java
+++ b/test/139-register-natives/src/Main.java
@@ -47,7 +47,7 @@
     }
   }
 
-  private native static int registerNatives(Class c);
+  private native static int registerNatives(Class<?> c);
 
   private static void expectThrows(Base b) {
     try {
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
index 11de660..2b77b29 100644
--- a/test/141-class-unload/expected.txt
+++ b/test/141-class-unload/expected.txt
@@ -12,7 +12,6 @@
 JNI_OnUnload called
 null
 loader null false
-loader null false
 JNI_OnLoad called
 JNI_OnUnload called
 null
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 17a6049..f9b6180 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -28,17 +28,15 @@
 
     public static void main(String[] args) throws Exception {
         nativeLibraryName = args[0];
-        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
         if (pathClassLoader == null) {
             throw new AssertionError("Couldn't find path class loader class");
         }
-        Constructor constructor =
+        Constructor<?> constructor =
             pathClassLoader.getDeclaredConstructor(String.class, String.class, ClassLoader.class);
         try {
             testUnloadClass(constructor);
             testUnloadLoader(constructor);
-            // Test that we don't unload if we have a Method keeping the class live.
-            testNoUnloadInvoke(constructor);
             // Test that we don't unload if we have an instance.
             testNoUnloadInstance(constructor);
             // Test JNI_OnLoad and JNI_OnUnload.
@@ -69,7 +67,7 @@
         System.out.println("Number of loaded unload-ex maps " + count);
     }
 
-    private static void stressTest(Constructor constructor) throws Exception {
+    private static void stressTest(Constructor<?> constructor) throws Exception {
         for (int i = 0; i <= 100; ++i) {
             setUpUnloadLoader(constructor, false);
             if (i % 10 == 0) {
@@ -78,18 +76,18 @@
         }
     }
 
-    private static void testUnloadClass(Constructor constructor) throws Exception {
-        WeakReference<Class> klass = setUpUnloadClass(constructor);
+    private static void testUnloadClass(Constructor<?> constructor) throws Exception {
+        WeakReference<Class> klass = setUpUnloadClassWeak(constructor);
         // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
-        WeakReference<Class> klass2 = setUpUnloadClass(constructor);
+        WeakReference<Class> klass2 = setUpUnloadClassWeak(constructor);
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(klass.get());
         System.out.println(klass2.get());
     }
 
-    private static void testUnloadLoader(Constructor constructor)
+    private static void testUnloadLoader(Constructor<?> constructor)
         throws Exception {
       WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
       // No strong references to class loader, should get unloaded.
@@ -98,17 +96,19 @@
       System.out.println(loader.get());
     }
 
-    private static void testStackTrace(Constructor constructor) throws Exception {
-        WeakReference<Class> klass = setUpUnloadClass(constructor);
-        Method stackTraceMethod = klass.get().getDeclaredMethod("generateStackTrace");
-        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass.get());
+    private static void testStackTrace(Constructor<?> constructor) throws Exception {
+        Class<?> klass = setUpUnloadClass(constructor);
+        WeakReference<Class> weak_klass = new WeakReference(klass);
+        Method stackTraceMethod = klass.getDeclaredMethod("generateStackTrace");
+        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass);
         stackTraceMethod = null;
+        klass = null;
         Runtime.getRuntime().gc();
-        boolean isNull = klass.get() == null;
+        boolean isNull = weak_klass.get() == null;
         System.out.println("class null " + isNull + " " + throwable.getMessage());
     }
 
-    private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
+    private static void testLoadAndUnloadLibrary(Constructor<?> constructor) throws Exception {
         WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
         // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
@@ -116,31 +116,40 @@
         System.out.println(loader.get());
     }
 
-    private static void testNoUnloadInvoke(Constructor constructor) throws Exception {
-        WeakReference<ClassLoader> loader =
-            new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader()));
-        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
-        intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get());
-        boolean isNull = loader.get() == null;
-        System.out.println("loader null " + isNull);
+    private static Object testNoUnloadHelper(ClassLoader loader) throws Exception {
+        Class<?> intHolder = loader.loadClass("IntHolder");
+        return intHolder.newInstance();
     }
 
-    private static void testNoUnloadInstance(Constructor constructor) throws Exception {
-        WeakReference<ClassLoader> loader =
-            new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader()));
-        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
-        Object o = intHolder.get().newInstance();
-        Runtime.getRuntime().gc();
-        boolean isNull = loader.get() == null;
-        System.out.println("loader null " + isNull);
+    static class Pair {
+      public Pair(Object o, ClassLoader l) {
+        object = o;
+        classLoader = new WeakReference<ClassLoader>(l);
+      }
+
+      public Object object;
+      public WeakReference<ClassLoader> classLoader;
     }
 
-    private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception {
+    private static Pair testNoUnloadInstanceHelper(Constructor<?> constructor) throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+        Object o = testNoUnloadHelper(loader);
+        return new Pair(o, loader);
+    }
+
+    private static void testNoUnloadInstance(Constructor<?> constructor) throws Exception {
+        Pair p = testNoUnloadInstanceHelper(constructor);
+        Runtime.getRuntime().gc();
+        // If the class loader was unloded too early due to races, just pass the test.
+        boolean isNull = p.classLoader.get() == null;
+        System.out.println("loader null " + isNull);
+    }
+
+    private static Class<?> setUpUnloadClass(Constructor<?> constructor) throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method getValue = intHolder.getDeclaredMethod("getValue");
         Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
         // Make sure we don't accidentally preserve the value in the int holder, the class
@@ -149,15 +158,20 @@
         setValue.invoke(intHolder, 2);
         System.out.println((int) getValue.invoke(intHolder));
         waitForCompilation(intHolder);
-        return new WeakReference(intHolder);
+        return intHolder;
     }
 
-    private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor,
+    private static WeakReference<Class> setUpUnloadClassWeak(Constructor<?> constructor)
+            throws Exception {
+        return new WeakReference<Class>(setUpUnloadClass(constructor));
+    }
+
+    private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor<?> constructor,
                                                                 boolean waitForCompilation)
         throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
         setValue.invoke(intHolder, 2);
         if (waitForCompilation) {
@@ -166,7 +180,7 @@
         return new WeakReference(loader);
     }
 
-    private static void waitForCompilation(Class intHolder) throws Exception {
+    private static void waitForCompilation(Class<?> intHolder) throws Exception {
       // Load the native library so that we can call waitForCompilation.
       Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
       loadLibrary.invoke(intHolder, nativeLibraryName);
@@ -175,11 +189,11 @@
       waitForCompilation.invoke(intHolder);
     }
 
-    private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor constructor)
+    private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor<?> constructor)
         throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
-        Class intHolder = loader.loadClass("IntHolder");
+        Class<?> intHolder = loader.loadClass("IntHolder");
         Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
         loadLibrary.invoke(intHolder, nativeLibraryName);
         waitForCompilation(intHolder);
diff --git a/test/142-classloader2/src/Main.java b/test/142-classloader2/src/Main.java
index 89dadce..80b00e7 100644
--- a/test/142-classloader2/src/Main.java
+++ b/test/142-classloader2/src/Main.java
@@ -25,8 +25,8 @@
     private static ClassLoader createClassLoader(String dexPath, ClassLoader parent) {
         try {
             Class<?> myClassLoaderClass = Class.forName("MyPathClassLoader");
-            Constructor constructor = myClassLoaderClass.getConstructor(String.class,
-                                                                        ClassLoader.class);
+            Constructor<?> constructor = myClassLoaderClass.getConstructor(String.class,
+                                                                           ClassLoader.class);
             return (ClassLoader)constructor.newInstance(dexPath, parent);
         } catch (Exception e) {
             // Ups, not available?!?!
diff --git a/test/145-alloc-tracking-stress/src/Main.java b/test/145-alloc-tracking-stress/src/Main.java
index 752fdd9..4a67a80 100644
--- a/test/145-alloc-tracking-stress/src/Main.java
+++ b/test/145-alloc-tracking-stress/src/Main.java
@@ -31,7 +31,7 @@
     }
 
     public static void main(String[] args) throws Exception {
-      Class klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
+      Class<?> klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
       if (klass == null) {
           throw new AssertionError("Couldn't find DdmVmInternal class");
       }
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass1.java b/test/148-multithread-gc-annotations/src/AnnoClass1.java
index b82c61f..3eb45ae 100644
--- a/test/148-multithread-gc-annotations/src/AnnoClass1.java
+++ b/test/148-multithread-gc-annotations/src/AnnoClass1.java
@@ -19,5 +19,5 @@
 @Retention(RetentionPolicy.RUNTIME)
 @Target(ElementType.TYPE)
 public @interface AnnoClass1 {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass2.java b/test/148-multithread-gc-annotations/src/AnnoClass2.java
index c75d950..b17490f 100644
--- a/test/148-multithread-gc-annotations/src/AnnoClass2.java
+++ b/test/148-multithread-gc-annotations/src/AnnoClass2.java
@@ -19,5 +19,5 @@
 @Retention(RetentionPolicy.RUNTIME)
 @Target(ElementType.TYPE)
 public @interface AnnoClass2 {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass3.java b/test/148-multithread-gc-annotations/src/AnnoClass3.java
index 5b4a378..7d600a8 100644
--- a/test/148-multithread-gc-annotations/src/AnnoClass3.java
+++ b/test/148-multithread-gc-annotations/src/AnnoClass3.java
@@ -19,5 +19,5 @@
 @Retention(RetentionPolicy.RUNTIME)
 @Target(ElementType.TYPE)
 public @interface AnnoClass3 {
-    Class value();
+    Class<?> value();
 }
diff --git a/test/955-lambda-smali/run b/test/149-suspend-all-stress/check
similarity index 75%
rename from test/955-lambda-smali/run
rename to test/149-suspend-all-stress/check
index 2fb2f89..d30b888 100755
--- a/test/955-lambda-smali/run
+++ b/test/149-suspend-all-stress/check
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (C) 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --experimental lambdas
+# Only compare the last line.
+tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/149-suspend-all-stress/expected.txt b/test/149-suspend-all-stress/expected.txt
new file mode 100644
index 0000000..134d8d0
--- /dev/null
+++ b/test/149-suspend-all-stress/expected.txt
@@ -0,0 +1 @@
+Finishing
diff --git a/test/149-suspend-all-stress/info.txt b/test/149-suspend-all-stress/info.txt
new file mode 100644
index 0000000..29b414c
--- /dev/null
+++ b/test/149-suspend-all-stress/info.txt
@@ -0,0 +1 @@
+Stress test for multiple threads calling SuspendAll
diff --git a/test/149-suspend-all-stress/src/Main.java b/test/149-suspend-all-stress/src/Main.java
new file mode 100644
index 0000000..6a27c4b
--- /dev/null
+++ b/test/149-suspend-all-stress/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+public class Main implements Runnable {
+    static final int numberOfThreads = 8;
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+        final Thread[] threads = new Thread[numberOfThreads];
+        for (int t = 0; t < threads.length; t++) {
+            threads[t] = new Thread(new Main());
+            threads[t].start();
+        }
+        for (Thread t : threads) {
+            t.join();
+        }
+        System.out.println("Finishing");
+    }
+
+    public void run() {
+        suspendAndResume();
+    }
+
+    private static native void suspendAndResume();
+}
diff --git a/test/149-suspend-all-stress/suspend_all.cc b/test/149-suspend-all-stress/suspend_all.cc
new file mode 100644
index 0000000..dfd944a
--- /dev/null
+++ b/test/149-suspend-all-stress/suspend_all.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/time_utils.h"
+#include "jni.h"
+#include "runtime.h"
+#include "thread_list.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_suspendAndResume(JNIEnv*, jclass) {
+  static constexpr size_t kInitialSleepUS = 100 * 1000;  // 100ms.
+  usleep(kInitialSleepUS);  // Leave some time for threads to get in here before we start suspending.
+  enum Operation {
+    kOPSuspendAll,
+    kOPDumpStack,
+    kOPSuspendAllDumpStack,
+    // Total number of operations.
+    kOPNumber,
+  };
+  const uint64_t start_time = NanoTime();
+  size_t iterations = 0;
+  // Run for a fixed period of 10 seconds.
+  while (NanoTime() - start_time < MsToNs(10 * 1000)) {
+    switch (static_cast<Operation>(iterations % kOPNumber)) {
+      case kOPSuspendAll: {
+        ScopedSuspendAll ssa(__FUNCTION__);
+        usleep(500);
+        break;
+      }
+      case kOPDumpStack: {
+        Runtime::Current()->GetThreadList()->Dump(LOG(INFO));
+        usleep(500);
+        break;
+      }
+      case kOPSuspendAllDumpStack: {
+        // Not yet supported.
+        // ScopedSuspendAll ssa(__FUNCTION__);
+        // Runtime::Current()->GetThreadList()->Dump(LOG(INFO));
+        break;
+      }
+      case kOPNumber:
+        break;
+    }
+    ++iterations;
+  }
+  LOG(INFO) << "Did " << iterations << " iterations";
+}
+
+}  // namespace art
diff --git a/test/201-built-in-exception-detail-messages/src/Main.java b/test/201-built-in-exception-detail-messages/src/Main.java
index 52d4259..dc58819 100644
--- a/test/201-built-in-exception-detail-messages/src/Main.java
+++ b/test/201-built-in-exception-detail-messages/src/Main.java
@@ -247,7 +247,7 @@
    * Helper for testCastOperatorWithArrays. It's important that
    * the return type is Object.
    */
-  private static Object makeArray(Class c) {
+  private static Object makeArray(Class<?> c) {
     return Array.newInstance(c, 1);
   }
 
@@ -461,7 +461,7 @@
       "hello there".substring(9,14);
       fail();
     } catch (StringIndexOutOfBoundsException ex) {
-      assertEquals("length=11; regionStart=9; regionLength=5", ex.getMessage());
+      assertEquals("length=11; index=14", ex.getMessage());
     }
   }
 }
diff --git a/test/412-new-array/info.txt b/test/412-new-array/info.txt
index cb388b6..b5f834a 100644
--- a/test/412-new-array/info.txt
+++ b/test/412-new-array/info.txt
@@ -1 +1,3 @@
 Simple tests for new-array, filled-new-array and fill-array-data.
+Regression test for the arm64 mterp miscalculating the fill-array-data-payload
+address, zero-extending a register instead of sign-extending.
diff --git a/test/412-new-array/smali/fill_array_data.smali b/test/412-new-array/smali/fill_array_data.smali
index 34776db..2b24e56 100644
--- a/test/412-new-array/smali/fill_array_data.smali
+++ b/test/412-new-array/smali/fill_array_data.smali
@@ -15,6 +15,21 @@
 
 .end method
 
+.method public static intArrayFillInstructionAfterData([I)V
+   .registers 1
+   goto :FillInstruction
+
+:ArrayData
+    .array-data 4
+        1 2 3 4 5
+    .end array-data
+
+:FillInstruction
+   fill-array-data v0, :ArrayData
+   return-void
+
+.end method
+
 .method public static shortArray([S)V
    .registers 1
 
diff --git a/test/412-new-array/src/Main.java b/test/412-new-array/src/Main.java
index b9c2a05..d95d2c5 100644
--- a/test/412-new-array/src/Main.java
+++ b/test/412-new-array/src/Main.java
@@ -259,6 +259,45 @@
     }
 
     {
+      Method m = c.getMethod("intArrayFillInstructionAfterData", int[].class);
+      int[] array = new int[7];
+      Object[] args = { array };
+      m.invoke(null, args);
+      assertEquals(7, array.length);
+      assertEquals(1, array[0]);
+      assertEquals(2, array[1]);
+      assertEquals(3, array[2]);
+      assertEquals(4, array[3]);
+      assertEquals(5, array[4]);
+      assertEquals(0, array[5]);
+      assertEquals(0, array[6]);
+
+      array = new int[2];
+      args[0] = array;
+      Throwable exception  = null;
+      try {
+        m.invoke(null, args);
+      } catch (InvocationTargetException e) {
+        exception = e.getCause();
+        assertTrue(exception instanceof IndexOutOfBoundsException);
+      }
+      assertNotNull(exception);
+      exception = null;
+      // Test that nothing has been written to the array.
+      assertEquals(0, array[0]);
+      assertEquals(0, array[1]);
+
+      args[0] = null;
+      try {
+        m.invoke(null, args);
+      } catch (InvocationTargetException e) {
+        exception = e.getCause();
+        assertTrue(exception instanceof NullPointerException);
+      }
+      assertNotNull(exception);
+    }
+
+    {
       Method m = c.getMethod("shortArray", short[].class);
       short[] array = new short[7];
       Object[] args = { array };
diff --git a/test/420-const-class/src/Main.java b/test/420-const-class/src/Main.java
index 44a7436..90ccf3a 100644
--- a/test/420-const-class/src/Main.java
+++ b/test/420-const-class/src/Main.java
@@ -53,15 +53,15 @@
     $opt$LoadAndClinitCheck();
   }
 
-  public static Class $opt$LoadThisClass() {
+  public static Class<?> $opt$LoadThisClass() {
     return Main.class;
   }
 
-  public static Class $opt$LoadOtherClass() {
+  public static Class<?> $opt$LoadOtherClass() {
     return Other.class;
   }
 
-  public static Class $opt$LoadSystemClass() {
+  public static Class<?> $opt$LoadSystemClass() {
     return System.class;
   }
 
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index b7712a7..33ef10b 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -213,17 +213,17 @@
    * Exercise constant folding on addition.
    */
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: int Main.IntAddition1() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntAddition1() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static int IntAddition1() {
@@ -234,7 +234,7 @@
     return c;
   }
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
@@ -244,11 +244,11 @@
   /// CHECK-DAG:     <<Add3:i\d+>>    Add [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                      Return [<<Add3>>]
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const14:i\d+>> IntConstant 14
   /// CHECK-DAG:                      Return [<<Const14>>]
 
-  /// CHECK-START: int Main.IntAddition2() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntAddition2() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static int IntAddition2() {
@@ -263,17 +263,17 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongAddition() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Const2:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<Add:j\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: long Main.LongAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:j\d+>>  LongConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: long Main.LongAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongAddition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static long LongAddition() {
@@ -284,17 +284,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatAddition() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:f\d+>>  FloatConstant 1
   /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
   /// CHECK-DAG:     <<Add:f\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: float Main.FloatAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: float Main.FloatAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatAddition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static float FloatAddition() {
@@ -305,17 +305,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleAddition() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:d\d+>>  DoubleConstant 1
   /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
   /// CHECK-DAG:     <<Add:d\d+>>     Add [<<Const1>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: double Main.DoubleAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
   /// CHECK-DAG:                      Return [<<Const3>>]
 
-  /// CHECK-START: double Main.DoubleAddition() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleAddition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
 
   public static double DoubleAddition() {
@@ -331,17 +331,17 @@
    * Exercise constant folding on subtraction.
    */
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:i\d+>>  IntConstant 6
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:i\d+>>  IntConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: int Main.IntSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static int IntSubtraction() {
@@ -352,17 +352,17 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:j\d+>>  LongConstant 6
   /// CHECK-DAG:     <<Const2:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<Sub:j\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:j\d+>>  LongConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: long Main.LongSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static long LongSubtraction() {
@@ -373,17 +373,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatSubtraction() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:f\d+>>  FloatConstant 6
   /// CHECK-DAG:     <<Const2:f\d+>>  FloatConstant 2
   /// CHECK-DAG:     <<Sub:f\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: float Main.FloatSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:f\d+>>  FloatConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: float Main.FloatSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static float FloatSubtraction() {
@@ -394,17 +394,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleSubtraction() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const6:d\d+>>  DoubleConstant 6
   /// CHECK-DAG:     <<Const2:d\d+>>  DoubleConstant 2
   /// CHECK-DAG:     <<Sub:d\d+>>     Sub [<<Const6>>,<<Const2>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: double Main.DoubleSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:d\d+>>  DoubleConstant 4
   /// CHECK-DAG:                      Return [<<Const4>>]
 
-  /// CHECK-START: double Main.DoubleSubtraction() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleSubtraction() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Sub
 
   public static double DoubleSubtraction() {
@@ -420,17 +420,17 @@
    * Exercise constant folding on multiplication.
    */
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:     <<Mul:i\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:i\d+>> IntConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: int Main.IntMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static int IntMultiplication() {
@@ -441,17 +441,17 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:j\d+>>  LongConstant 7
   /// CHECK-DAG:     <<Const3:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:j\d+>> LongConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: long Main.LongMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static long LongMultiplication() {
@@ -462,17 +462,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatMultiplication() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:f\d+>>  FloatConstant 7
   /// CHECK-DAG:     <<Const3:f\d+>>  FloatConstant 3
   /// CHECK-DAG:     <<Mul:f\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: float Main.FloatMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:f\d+>> FloatConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: float Main.FloatMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static float FloatMultiplication() {
@@ -483,17 +483,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleMultiplication() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:d\d+>>  DoubleConstant 7
   /// CHECK-DAG:     <<Const3:d\d+>>  DoubleConstant 3
   /// CHECK-DAG:     <<Mul:d\d+>>     Mul [<<Const7>>,<<Const3>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: double Main.DoubleMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const21:d\d+>> DoubleConstant 21
   /// CHECK-DAG:                      Return [<<Const21>>]
 
-  /// CHECK-START: double Main.DoubleMultiplication() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleMultiplication() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static double DoubleMultiplication() {
@@ -509,18 +509,18 @@
    * Exercise constant folding on division.
    */
 
-  /// CHECK-START: int Main.IntDivision() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:i\d+>>   IntConstant 8
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<Div0Chk:i\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Div:i\d+>>      Div [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: int Main.IntDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: int Main.IntDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Div
 
@@ -532,18 +532,18 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongDivision() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:j\d+>>   LongConstant 8
   /// CHECK-DAG:     <<Const3:j\d+>>   LongConstant 3
   /// CHECK-DAG:     <<Div0Chk:j\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Div:j\d+>>      Div [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: long Main.LongDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.LongDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Div
 
@@ -555,17 +555,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatDivision() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
   /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
   /// CHECK-DAG:     <<Div:f\d+>>      Div [<<Const8>>,<<Const2P5>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: float Main.FloatDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3P2:f\d+>> FloatConstant 3.2
   /// CHECK-DAG:                       Return [<<Const3P2>>]
 
-  /// CHECK-START: float Main.FloatDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Div
 
   public static float FloatDivision() {
@@ -576,17 +576,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleDivision() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
   /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
   /// CHECK-DAG:     <<Div:d\d+>>      Div [<<Const8>>,<<Const2P5>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: double Main.DoubleDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const3P2:d\d+>> DoubleConstant 3.2
   /// CHECK-DAG:                       Return [<<Const3P2>>]
 
-  /// CHECK-START: double Main.DoubleDivision() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleDivision() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Div
 
   public static double DoubleDivision() {
@@ -602,18 +602,18 @@
    * Exercise constant folding on remainder.
    */
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:i\d+>>   IntConstant 8
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<Div0Chk:i\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Rem:i\d+>>      Rem [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: int Main.IntRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.IntRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Rem
 
@@ -625,18 +625,18 @@
     return c;
   }
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:j\d+>>   LongConstant 8
   /// CHECK-DAG:     <<Const3:j\d+>>   LongConstant 3
   /// CHECK-DAG:     <<Div0Chk:j\d+>>  DivZeroCheck [<<Const3>>]
   /// CHECK-DAG:     <<Rem:j\d+>>      Rem [<<Const8>>,<<Div0Chk>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.LongRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.LongRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       DivZeroCheck
   /// CHECK-NOT:                       Rem
 
@@ -648,17 +648,17 @@
     return c;
   }
 
-  /// CHECK-START: float Main.FloatRemainder() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:f\d+>>   FloatConstant 8
   /// CHECK-DAG:     <<Const2P5:f\d+>> FloatConstant 2.5
   /// CHECK-DAG:     <<Rem:f\d+>>      Rem [<<Const8>>,<<Const2P5>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: float Main.FloatRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0P5:f\d+>> FloatConstant 0.5
   /// CHECK-DAG:                       Return [<<Const0P5>>]
 
-  /// CHECK-START: float Main.FloatRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.FloatRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Rem
 
   public static float FloatRemainder() {
@@ -669,17 +669,17 @@
     return c;
   }
 
-  /// CHECK-START: double Main.DoubleRemainder() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const8:d\d+>>   DoubleConstant 8
   /// CHECK-DAG:     <<Const2P5:d\d+>> DoubleConstant 2.5
   /// CHECK-DAG:     <<Rem:d\d+>>      Rem [<<Const8>>,<<Const2P5>>]
   /// CHECK-DAG:                       Return [<<Rem>>]
 
-  /// CHECK-START: double Main.DoubleRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0P5:d\d+>> DoubleConstant 0.5
   /// CHECK-DAG:                       Return [<<Const0P5>>]
 
-  /// CHECK-START: double Main.DoubleRemainder() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.DoubleRemainder() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Rem
 
   public static double DoubleRemainder() {
@@ -695,18 +695,18 @@
    * Exercise constant folding on left shift.
    */
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Const1>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const4:i\d+>>   IntConstant 4
   /// CHECK-DAG:                       Return [<<Const4>>]
 
-  /// CHECK-START: int Main.ShlIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ShlIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shl
 
   public static int ShlIntLong() {
@@ -715,17 +715,17 @@
     return lhs << rhs;
   }
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<Shl:j\d+>>      Shl [<<Const3L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const12L:j\d+>> LongConstant 12
   /// CHECK-DAG:                       Return [<<Const12L>>]
 
-  /// CHECK-START: long Main.ShlLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ShlLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shl
 
   public static long ShlLongInt() {
@@ -739,18 +739,18 @@
    * Exercise constant folding on right shift.
    */
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<Shr:i\d+>>      Shr [<<Const7>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:                       Return [<<Const1>>]
 
-  /// CHECK-START: int Main.ShrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shr
 
   public static int ShrIntLong() {
@@ -759,17 +759,17 @@
     return lhs >> rhs;
   }
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const9L:j\d+>>  LongConstant 9
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Const9L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2L>>]
 
-  /// CHECK-START: long Main.ShrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Shr
 
   public static long ShrLongInt() {
@@ -783,18 +783,18 @@
    * Exercise constant folding on unsigned right shift.
    */
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstM7:i\d+>>  IntConstant -7
   /// CHECK-DAG:     <<Const2L:j\d+>>  LongConstant 2
   /// CHECK-DAG:     <<TypeConv:i\d+>> TypeConversion [<<Const2L>>]
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<ConstM7>>,<<TypeConv>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstRes:i\d+>> IntConstant 1073741822
   /// CHECK-DAG:                       Return [<<ConstRes>>]
 
-  /// CHECK-START: int Main.UShrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.UShrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       UShr
 
   public static int UShrIntLong() {
@@ -803,17 +803,17 @@
     return lhs >>> rhs;
   }
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstM9L:j\d+>> LongConstant -9
   /// CHECK-DAG:     <<Const2:i\d+>>   IntConstant 2
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<ConstM9L>>,<<Const2>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstRes:j\d+>> LongConstant 4611686018427387901
   /// CHECK-DAG:                       Return [<<ConstRes>>]
 
-  /// CHECK-START: long Main.UShrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.UShrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       UShr
 
   public static long UShrLongInt() {
@@ -827,18 +827,18 @@
    * Exercise constant folding on logical and.
    */
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<And:j\d+>>      And [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.AndIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.AndIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       And
 
   public static long AndIntLong() {
@@ -847,18 +847,18 @@
     return lhs & rhs;
   }
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
   /// CHECK-DAG:     <<And:j\d+>>      And [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const2:j\d+>>   LongConstant 2
   /// CHECK-DAG:                       Return [<<Const2>>]
 
-  /// CHECK-START: long Main.AndLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.AndLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       And
 
   public static long AndLongInt() {
@@ -872,18 +872,18 @@
    * Exercise constant folding on logical or.
    */
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<Or:j\d+>>       Or [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const11:j\d+>>  LongConstant 11
   /// CHECK-DAG:                       Return [<<Const11>>]
 
-  /// CHECK-START: long Main.OrIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.OrIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Or
 
   public static long OrIntLong() {
@@ -892,18 +892,18 @@
     return lhs | rhs;
   }
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
   /// CHECK-DAG:     <<Or:j\d+>>       Or [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const11:j\d+>>  LongConstant 11
   /// CHECK-DAG:                       Return [<<Const11>>]
 
-  /// CHECK-START: long Main.OrLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.OrLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Or
 
   public static long OrLongInt() {
@@ -917,18 +917,18 @@
    * Exercise constant folding on logical exclusive or.
    */
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10:i\d+>>  IntConstant 10
   /// CHECK-DAG:     <<Const3L:j\d+>>  LongConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const10>>]
   /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<TypeConv>>,<<Const3L>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const9:j\d+>>   LongConstant 9
   /// CHECK-DAG:                       Return [<<Const9>>]
 
-  /// CHECK-START: long Main.XorIntLong() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.XorIntLong() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Xor
 
   public static long XorIntLong() {
@@ -937,18 +937,18 @@
     return lhs ^ rhs;
   }
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
   /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const9:j\d+>>   LongConstant 9
   /// CHECK-DAG:                       Return [<<Const9>>]
 
-  /// CHECK-START: long Main.XorLongInt() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.XorLongInt() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       Xor
 
   public static long XorLongInt() {
@@ -962,17 +962,17 @@
    * Exercise constant folding on constant (static) condition.
    */
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Cond:z\d+>>    GreaterThanOrEqual [<<Const7>>,<<Const2>>]
   /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
   /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Const1>>]
 
-  /// CHECK-START: int Main.StaticCondition() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.StaticCondition() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      GreaterThanOrEqual
 
   public static int StaticCondition() {
@@ -991,16 +991,16 @@
    * Exercise constant folding on constant (static) condition for null references.
    */
 
-  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Null:l\d+>>    NullConstant
   /// CHECK-DAG:     <<Cond:z\d+>>    NotEqual [<<Null>>,<<Null>>]
   /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
-  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0:i\d+>>  IntConstant 0
   /// CHECK-DAG:                      Select [{{i\d+}},{{i\d+}},<<Const0>>]
 
-  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding$after_inlining (after)
   /// CHECK-NOT:                      NotEqual
 
   private static Object getNull() {
@@ -1023,7 +1023,7 @@
    * (forward) post-order traversal of the the dominator tree.
    */
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
@@ -1032,14 +1032,14 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Select [<<Sub>>,<<Add>>,<<Cond>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Cond:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:     <<Phi:i\d+>>     Select [<<Const3>>,<<Const7>>,<<Cond>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding$after_inlining (after)
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Sub
 
@@ -1325,16 +1325,16 @@
    * Exercise constant folding on type conversions.
    */
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:j\d+>>  LongConstant 33
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: int Main.ReturnInt33() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnInt33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnInt33() {
@@ -1342,16 +1342,16 @@
     return (int) imm;
   }
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstMax:f\d+>> FloatConstant 1e+34
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<ConstMax>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<ConstMax:i\d+>> IntConstant 2147483647
   /// CHECK-DAG:                       Return [<<ConstMax>>]
 
-  /// CHECK-START: int Main.ReturnIntMax() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnIntMax() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnIntMax() {
@@ -1359,16 +1359,16 @@
     return (int) imm;
   }
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding_after_inlining (before)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstNaN:d\d+>> DoubleConstant nan
   /// CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<ConstNaN>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:                       Return [<<Const0>>]
 
-  /// CHECK-START: int Main.ReturnInt0() constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.ReturnInt0() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static int ReturnInt0() {
@@ -1376,16 +1376,16 @@
     return (int) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:j\d+>>  LongConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: long Main.ReturnLong33() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong33() {
@@ -1393,16 +1393,16 @@
     return (long) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:f\d+>>  FloatConstant 34
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: long Main.ReturnLong34() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong34() {
@@ -1410,16 +1410,16 @@
     return (long) imm;
   }
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding_after_inlining (before)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<ConstNaN:d\d+>> DoubleConstant nan
   /// CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<ConstNaN>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:                       Return [<<Const0>>]
 
-  /// CHECK-START: long Main.ReturnLong0() constant_folding_after_inlining (after)
+  /// CHECK-START: long Main.ReturnLong0() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static long ReturnLong0() {
@@ -1427,16 +1427,16 @@
     return (long) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:f\d+>>  FloatConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
-  /// CHECK-START: float Main.ReturnFloat33() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat33() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat33() {
@@ -1444,16 +1444,16 @@
     return (float) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:f\d+>>  FloatConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: float Main.ReturnFloat34() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat34() {
@@ -1461,16 +1461,16 @@
     return (float) imm;
   }
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding_after_inlining (before)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const:d\d+>>    DoubleConstant 99.25
   /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const:f\d+>>    FloatConstant 99.25
   /// CHECK-DAG:                       Return [<<Const>>]
 
-  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding_after_inlining (after)
+  /// CHECK-START: float Main.ReturnFloat99P25() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static float ReturnFloat99P25() {
@@ -1478,12 +1478,12 @@
     return (float) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble33() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.ReturnDouble33() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const33>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble33() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble33() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const33:d\d+>>  DoubleConstant 33
   /// CHECK-DAG:                       Return [<<Const33>>]
 
@@ -1492,16 +1492,16 @@
     return (double) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const34>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const34:d\d+>>  DoubleConstant 34
   /// CHECK-DAG:                       Return [<<Const34>>]
 
-  /// CHECK-START: double Main.ReturnDouble34() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble34() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static double ReturnDouble34() {
@@ -1509,16 +1509,16 @@
     return (double) imm;
   }
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding_after_inlining (before)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (before)
   /// CHECK-DAG:     <<Const:f\d+>>    FloatConstant 99.25
   /// CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const>>]
   /// CHECK-DAG:                       Return [<<Convert>>]
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (after)
   /// CHECK-DAG:     <<Const:d\d+>>    DoubleConstant 99.25
   /// CHECK-DAG:                       Return [<<Const>>]
 
-  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding_after_inlining (after)
+  /// CHECK-START: double Main.ReturnDouble99P25() constant_folding$after_inlining (after)
   /// CHECK-NOT:                       TypeConversion
 
   public static double ReturnDouble99P25() {
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 66e1d92..3a56c3b 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -927,6 +927,32 @@
     }
   }
 
+  /// CHECK-START: void Main.nonzeroLength(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.nonzeroLength(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  public static void nonzeroLength(int[] a) {
+    if (a.length != 0) {
+      a[0] = 112;
+    }
+  }
+
+  /// CHECK-START: void Main.knownLength(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.knownLength(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  public static void knownLength(int[] a) {
+    if (a.length == 2) {
+      a[0] = -1;
+      a[1] = -2;
+    }
+  }
+
   static int[][] mA;
 
   /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (before)
@@ -1178,9 +1204,6 @@
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
   /// CHECK: Goto
@@ -1191,7 +1214,7 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // Several HDeoptimize will be added. Two for each index.
+    // Three HDeoptimize will be added for the bounds.
     // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
@@ -1240,20 +1263,12 @@
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
   /// CHECK: Goto
   /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Several HDeoptimize will be added.
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -1365,15 +1380,15 @@
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
-  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier$after_bce (after)
   //  Simplification removes the redundant check
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
 
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. Two for the index
-    // and one for null check on array.
+    // Three HDeoptimize will be added. Two for the index and one for null check on array. Then
+    // simplification removes one redundant HDeoptimize.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -1586,6 +1601,26 @@
       }
     }
 
+    nonzeroLength(array);
+    if (array[0] != 112) {
+      System.out.println("nonzero length failed!");
+    }
+
+    knownLength(array);
+    if (array[0] != 112 || array[1] != 1) {
+      System.out.println("nonzero length failed!");
+    }
+    array = new int[2];
+    knownLength(array);
+    if (array[0] != -1 || array[1] != -2) {
+      System.out.println("nonzero length failed!");
+    }
+
+    // Zero length array does not break.
+    array = new int[0];
+    nonzeroLength(array);
+    knownLength(array);
+
     mA = new int[4][4];
     for (int i = 0; i < 4; i++) {
       for (int j = 0; j < 4; j++) {
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 08b6cec..6e453af 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -30,6 +30,11 @@
   public void $noinline$f() {
     throw new RuntimeException();
   }
+
+  public int $inline$h(boolean cond) {
+    Super obj = (cond ? this : null);
+    return obj.hashCode();
+  }
 }
 
 class SubclassA extends Super {
@@ -98,7 +103,7 @@
   /// CHECK-NOT:     CheckCast
   public String testClassRemove() {
     Object s = SubclassA.class;
-    return ((Class)s).getName();
+    return ((Class<?>)s).getName();
   }
 
   /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier (before)
@@ -209,11 +214,11 @@
   /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
   /// CHECK-DAG:                   If [<<IOf>>]
 
-  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (before)
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_bce (before)
   /// CHECK:         CheckCast
   /// CHECK-NOT:     CheckCast
 
-  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (after)
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOf_Inlined(Object o) {
     if (!$inline$InstanceofSubclassC(o)) {
@@ -620,6 +625,46 @@
     o.mainField = 0;
   }
 
+  /// CHECK-START: void Main.testThisArgumentMoreSpecific(boolean) inliner (before)
+  /// CHECK-DAG:     <<Arg:l\d+>>   NewInstance
+  /// CHECK-DAG:                    InvokeVirtual [<<Arg>>,{{z\d+}}] method_name:Super.$inline$h
+
+  /// CHECK-START: void Main.testThisArgumentMoreSpecific(boolean) inliner (after)
+  /// CHECK-DAG:     <<Arg:l\d+>>   NewInstance
+  /// CHECK-DAG:     <<Null:l\d+>>  NullConstant
+  /// CHECK-DAG:     <<Phi:l\d+>>   Phi [<<Arg>>,<<Null>>] klass:SubclassA
+  /// CHECK-DAG:     <<NCPhi:l\d+>> NullCheck [<<Phi>>]
+  /// CHECK-DAG:                    InvokeVirtual [<<NCPhi>>] method_name:Super.hashCode
+
+  public void testThisArgumentMoreSpecific(boolean cond) {
+    // Inlining method from Super will build it with `this` typed as Super.
+    // Running RTP will sharpen it to SubclassA.
+    SubclassA obj = new SubclassA();
+    ((Super) obj).$inline$h(cond);
+  }
+
+  public static int $inline$hashCode(Super obj) {
+    return obj.hashCode();
+  }
+
+  /// CHECK-START: void Main.testExplicitArgumentMoreSpecific(SubclassA) inliner (before)
+  /// CHECK-DAG:     <<Arg:l\d+>>   ParameterValue klass:SubclassA
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                    InvokeStaticOrDirect [<<Arg>>{{(,[ij]\d+)?}}] method_name:Main.$inline$hashCode
+
+  /// CHECK-START: void Main.testExplicitArgumentMoreSpecific(SubclassA) inliner (after)
+  /// CHECK-DAG:     <<Arg:l\d+>>   ParameterValue klass:SubclassA
+  /// CHECK-DAG:     <<NCArg:l\d+>> NullCheck [<<Arg>>] klass:SubclassA
+  /// CHECK-DAG:                    InvokeVirtual [<<NCArg>>] method_name:Super.hashCode
+
+  public void testExplicitArgumentMoreSpecific(SubclassA obj) {
+    // Inlining a method will build it with reference types from its signature,
+    // here the callee graph is built with Super as the type of its only argument.
+    // Running RTP after its ParameterValue instructions are replaced with actual
+    // arguments will type the inner graph more precisely.
+    $inline$hashCode(obj);
+  }
+
   /// CHECK-START: void Main.testPhiHasOnlyNullInputs(boolean) inliner (before)
   /// CHECK:      <<Int:i\d+>>       IntConstant 0
   /// CHECK:      <<Phi:l\d+>>       Phi klass:Main exact:false
diff --git a/test/458-checker-instruction-simplification/smali/SmaliTests.smali b/test/458-checker-instruction-simplification/smali/SmaliTests.smali
index ede599b..6845961 100644
--- a/test/458-checker-instruction-simplification/smali/SmaliTests.smali
+++ b/test/458-checker-instruction-simplification/smali/SmaliTests.smali
@@ -191,3 +191,139 @@
 
 .end method
 
+## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const7:i\d+>>    IntConstant 7
+## CHECK-DAG:     <<Const8:i\d+>>    IntConstant 8
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const7>>]
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Add>>,<<Const8>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM1:i\d+>>   IntConstant -1
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<ConstM1>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static AddSubConst(I)I
+    .registers 3
+
+    .prologue
+    add-int/lit8 v0, p0, 7
+
+    const/16 v1, 8
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const3:i\d+>>    IntConstant 3
+## CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<ArgValue>>,<<Const3>>]
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<Sub>>,<<Const4>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const1>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static SubAddConst(I)I
+    .registers 2
+
+    .prologue
+    const/4 v0, 3
+
+    sub-int v0, p0, v0
+
+    add-int/lit8 v0, v0, 4
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const9:i\d+>>    IntConstant 9
+## CHECK-DAG:     <<Const10:i\d+>>   IntConstant 10
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<ArgValue>>,<<Const9>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Sub1>>,<<Const10>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM19:i\d+>>  IntConstant -19
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<ConstM19>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static SubSubConst1(I)I
+    .registers 3
+
+    .prologue
+    const/16 v1, 9
+
+    sub-int v0, p0, v1
+
+    const/16 v1, 10
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const11:i\d+>>   IntConstant 11
+## CHECK-DAG:     <<Const12:i\d+>>   IntConstant 12
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<Const11>>,<<ArgValue>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Sub1>>,<<Const12>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM1:i\d+>>   IntConstant -1
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<ConstM1>>,<<ArgValue>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+.method public static SubSubConst2(I)I
+    .registers 3
+
+    .prologue
+    rsub-int/lit8 v0, p0, 11
+
+    const/16 v1, 12
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const15:i\d+>>   IntConstant 15
+## CHECK-DAG:     <<Const16:i\d+>>   IntConstant 16
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<ArgValue>>,<<Const16>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Const15>>,<<Sub1>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const31:i\d+>>   IntConstant 31
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const31>>,<<ArgValue>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+.method public static SubSubConst3(I)I
+    .registers 2
+
+    .prologue
+    const/16 v0, 16
+
+    sub-int v0, p0, v0
+
+    rsub-int/lit8 v0, v0, 15
+
+    return v0
+.end method
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 53c2e0b..5b14735 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -18,6 +18,8 @@
 
 public class Main {
 
+  static boolean doThrow = false;
+
   public static void assertBooleanEquals(boolean expected, boolean result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -58,41 +60,66 @@
    * Tiny programs exercising optimizations of arithmetic identities.
    */
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>  LongConstant 0
   /// CHECK-DAG:     <<Add:j\d+>>     Add [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Add
 
-  public static long Add0(long arg) {
+  public static long $noinline$Add0(long arg) {
+    if (doThrow) { throw new Error(); }
     return 0 + arg;
   }
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Const2:i\d+>>    IntConstant 2
+  /// CHECK-DAG:     <<ConstM3:i\d+>>   IntConstant -3
+  /// CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+  /// CHECK-DAG:     <<Add1:i\d+>>      Add [<<ArgValue>>,<<Const1>>]
+  /// CHECK-DAG:     <<Add2:i\d+>>      Add [<<Add1>>,<<Const2>>]
+  /// CHECK-DAG:     <<Add3:i\d+>>      Add [<<Add2>>,<<ConstM3>>]
+  /// CHECK-DAG:     <<Add4:i\d+>>      Add [<<Add3>>,<<Const4>>]
+  /// CHECK-DAG:                        Return [<<Add4>>]
+
+  /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const4>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static int $noinline$AddAddSubAddConst(int arg) {
+    if (doThrow) { throw new Error(); }
+    return arg + 1 + 2 - 3 + 4;
+  }
+
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<ConstF:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<And:i\d+>>     And [<<Arg>>,<<ConstF>>]
   /// CHECK-DAG:                      Return [<<And>>]
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (after)
   /// CHECK-NOT:                      And
 
-  public static int AndAllOnes(int arg) {
+  public static int $noinline$AndAllOnes(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg & -1;
   }
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const15:i\d+>>  IntConstant 15
@@ -100,20 +127,21 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const15>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const28>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (after)
   /// CHECK-NOT:                       And
 
-  public static int UShr28And15(int arg) {
+  public static int $noinline$UShr28And15(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 15;
   }
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const15:j\d+>>  LongConstant 15
@@ -121,20 +149,21 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const15>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const60>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (after)
   /// CHECK-NOT:                       And
 
-  public static long UShr60And15(long arg) {
+  public static long $noinline$UShr60And15(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 15;
   }
 
-  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$UShr28And7(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
@@ -142,7 +171,7 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And7(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
@@ -150,11 +179,12 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static int UShr28And7(int arg) {
+  public static int $noinline$UShr28And7(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 7;
   }
 
-  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr60And7(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
@@ -162,7 +192,7 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And7(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
@@ -170,11 +200,12 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static long UShr60And7(long arg) {
+  public static long $noinline$UShr60And7(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 7;
   }
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const255:i\d+>> IntConstant 255
@@ -182,21 +213,22 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const255>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const24>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
   /// CHECK-NOT:                       And
 
-  public static int Shr24And255(int arg) {
+  public static int $noinline$Shr24And255(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 255;
   }
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const255:j\d+>> LongConstant 255
@@ -204,21 +236,22 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const255>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const56>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
   /// CHECK-NOT:                       And
 
-  public static long Shr56And255(long arg) {
+  public static long $noinline$Shr56And255(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 255;
   }
 
-  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shr24And127(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
@@ -226,7 +259,7 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And127(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
@@ -234,11 +267,12 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static int Shr24And127(int arg) {
+  public static int $noinline$Shr24And127(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 127;
   }
 
-  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr56And127(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
@@ -246,7 +280,7 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And127(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
@@ -254,267 +288,361 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static long Shr56And127(long arg) {
+  public static long $noinline$Shr56And127(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 127;
   }
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Div:j\d+>>     Div [<<Arg>>,<<Const1>>]
   /// CHECK-DAG:                      Return [<<Div>>]
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (after)
   /// CHECK-NOT:                      Div
 
-  public static long Div1(long arg) {
+  public static long $noinline$Div1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 1;
   }
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Div:i\d+>>      Div [<<Arg>>,<<ConstN1>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Div
 
-  public static int DivN1(int arg) {
+  public static int $noinline$DivN1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -1;
   }
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const1>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Mul
 
-  public static long Mul1(long arg) {
+  public static long $noinline$Mul1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 1;
   }
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Mul:i\d+>>      Mul [<<Arg>>,<<ConstN1>>]
   /// CHECK-DAG:                       Return [<<Mul>>]
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Mul
 
-  public static int MulN1(int arg) {
+  public static int $noinline$MulN1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg * -1;
   }
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const128:j\d+>>  LongConstant 128
   /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Const128>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const7:i\d+>>    IntConstant 7
   /// CHECK-DAG:     <<Shl:j\d+>>       Shl [<<Arg>>,<<Const7>>]
   /// CHECK-DAG:                        Return [<<Shl>>]
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Mul
 
-  public static long MulPowerOfTwo128(long arg) {
+  public static long $noinline$MulPowerOfTwo128(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 128;
   }
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const10:j\d+>>   LongConstant 10
+  /// CHECK-DAG:     <<Const11:j\d+>>   LongConstant 11
+  /// CHECK-DAG:     <<Const12:j\d+>>   LongConstant 12
+  /// CHECK-DAG:     <<Mul1:j\d+>>      Mul [<<Const10>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Mul2:j\d+>>      Mul [<<Mul1>>,<<Const11>>]
+  /// CHECK-DAG:     <<Mul3:j\d+>>      Mul [<<Mul2>>,<<Const12>>]
+  /// CHECK-DAG:                        Return [<<Mul3>>]
+
+  /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const1320:j\d+>>  LongConstant 1320
+  /// CHECK-DAG:     <<Mul:j\d+>>        Mul [<<ArgValue>>,<<Const1320>>]
+  /// CHECK-DAG:                         Return [<<Mul>>]
+
+  public static long $noinline$MulMulMulConst(long arg) {
+    if (doThrow) { throw new Error(); }
+    return 10 * arg * 11 * 12;
+  }
+
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Or
 
-  public static int Or0(int arg) {
+  public static int $noinline$Or0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg | 0;
   }
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Or:j\d+>>        Or [<<Arg>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Or
 
-  public static long OrSame(long arg) {
+  public static long $noinline$OrSame(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg | arg;
   }
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Shl
 
-  public static int Shl0(int arg) {
+  public static int $noinline$Shl0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg << 0;
   }
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
 
-  public static long Shr0(long arg) {
+  public static long $noinline$Shr0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >> 0;
   }
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const64:i\d+>>  IntConstant 64
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const64>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
 
-  public static long Shr64(long arg) {
+  public static long $noinline$Shr64(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >> 64;
   }
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static long Sub0(long arg) {
+  public static long $noinline$Sub0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg - 0;
   }
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static int SubAliasNeg(int arg) {
+  public static int $noinline$SubAliasNeg(int arg) {
+    if (doThrow) { throw new Error(); }
     return 0 - arg;
   }
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const5:i\d+>>    IntConstant 5
+  /// CHECK-DAG:     <<Const6:i\d+>>    IntConstant 6
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const5>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<Sub>>,<<Const6>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const11:i\d+>>   IntConstant 11
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const11>>,<<ArgValue>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  public static int $noinline$SubAddConst1(int arg) {
+    if (doThrow) { throw new Error(); }
+    return 5 - arg + 6;
+  }
+
+  /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const14:i\d+>>   IntConstant 14
+  /// CHECK-DAG:     <<Const13:i\d+>>   IntConstant 13
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const13>>]
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const14>>,<<Add>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const1>>,<<ArgValue>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  public static int $noinline$SubAddConst2(int arg) {
+    if (doThrow) { throw new Error(); }
+    return 14 - (arg + 13);
+  }
+
+  /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const17:j\d+>>   LongConstant 17
+  /// CHECK-DAG:     <<Const18:j\d+>>   LongConstant 18
+  /// CHECK-DAG:     <<Sub1:j\d+>>      Sub [<<Const18>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Sub2:j\d+>>      Sub [<<Const17>>,<<Sub1>>]
+  /// CHECK-DAG:                        Return [<<Sub2>>]
+
+  /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK-DAG:     <<Add:j\d+>>       Add [<<ArgValue>>,<<ConstM1>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static long $noinline$SubSubConst(long arg) {
+    if (doThrow) { throw new Error(); }
+    return 17 - (18 - arg);
+  }
+
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       UShr
 
-  public static long UShr0(long arg) {
+  public static long $noinline$UShr0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >>> 0;
   }
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Xor
 
-  public static int Xor0(int arg) {
+  public static int $noinline$Xor0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg ^ 0;
   }
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstF:i\d+>>   IntConstant -1
   /// CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<ConstF>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Not>>]
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Xor
 
-  public static int XorAllOnes(int arg) {
+  public static int $noinline$XorAllOnes(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg ^ -1;
   }
 
@@ -525,7 +653,7 @@
    * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
    */
 
-  /// CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddNegs1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -533,7 +661,7 @@
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AddNegs1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-NOT:                       Neg
@@ -541,7 +669,8 @@
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  public static int AddNegs1(int arg1, int arg2) {
+  public static int $noinline$AddNegs1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 + -arg2;
   }
 
@@ -556,7 +685,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -566,7 +695,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -577,7 +706,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.AddNegs2(int, int) GVN (after)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) GVN (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -586,7 +715,8 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add>>,<<Add>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  public static int AddNegs2(int arg1, int arg2) {
+  public static int $noinline$AddNegs2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp1 = -arg1;
     int temp2 = -arg2;
     return (temp1 + temp2) | (temp1 + temp2);
@@ -600,7 +730,7 @@
    * the loop.
    */
 
-  /// CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNegs3(long, long) instruction_simplifier (before)
   //  -------------- Arguments and initial negation operations.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -612,7 +742,7 @@
   /// CHECK:         <<Add:j\d+>>      Add [<<Neg1>>,<<Neg2>>]
   /// CHECK:                           Goto
 
-  /// CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNegs3(long, long) instruction_simplifier (after)
   //  -------------- Arguments and initial negation operations.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -625,7 +755,8 @@
   /// CHECK-NOT:                       Neg
   /// CHECK:                           Goto
 
-  public static long AddNegs3(long arg1, long arg2) {
+  public static long $noinline$AddNegs3(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long res = 0;
     long n_arg1 = -arg1;
     long n_arg2 = -arg2;
@@ -641,24 +772,25 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`.
    */
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg1>>]
   /// CHECK-DAG:     <<Add:j\d+>>      Add [<<Neg>>,<<Arg2>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg2>>,<<Arg1>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  public static long AddNeg1(long arg1, long arg2) {
+  public static long $noinline$AddNeg1(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 + arg2;
   }
 
@@ -671,7 +803,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
@@ -680,7 +812,7 @@
   /// CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Res>>]
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
@@ -689,10 +821,11 @@
   /// CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Res>>]
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static long AddNeg2(long arg1, long arg2) {
+  public static long $noinline$AddNeg2(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long temp = -arg2;
     return (arg1 + temp) | (arg1 + temp);
   }
@@ -702,20 +835,21 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
    */
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:j\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:j\d+>>     Neg [<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Neg2>>]
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
 
-  public static long NegNeg1(long arg) {
+  public static long $noinline$NegNeg1(long arg) {
+    if (doThrow) { throw new Error(); }
     return -(-arg);
   }
 
@@ -726,29 +860,30 @@
    * and in `InstructionSimplifierVisitor::VisitAdd`.
    */
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Neg1>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg2>>,<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  /// CHECK-START: int Main.NegNeg2(int) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) constant_folding$after_inlining (after)
   /// CHECK:         <<Const0:i\d+>>   IntConstant 0
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
   /// CHECK:                           Return [<<Const0>>]
 
-  public static int NegNeg2(int arg) {
+  public static int $noinline$NegNeg2(int arg) {
+    if (doThrow) { throw new Error(); }
     int temp = -arg;
     return temp + -temp;
   }
@@ -760,22 +895,23 @@
    * and in `InstructionSimplifierVisitor::VisitSub`.
    */
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Const0>>,<<Neg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Sub
 
-  public static long NegNeg3(long arg) {
+  public static long $noinline$NegNeg3(long arg) {
+    if (doThrow) { throw new Error(); }
     return 0 - -arg;
   }
 
@@ -785,23 +921,24 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
    */
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Sub>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg2>>,<<Arg1>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
 
-  public static int NegSub1(int arg1, int arg2) {
+  public static int $noinline$NegSub1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -(arg1 - arg2);
   }
 
@@ -815,7 +952,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegSub2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
@@ -824,7 +961,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
@@ -833,7 +970,8 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  public static int NegSub2(int arg1, int arg2) {
+  public static int $noinline$NegSub2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp = arg1 - arg2;
     return -temp | -temp;
   }
@@ -843,41 +981,43 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNot`.
    */
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not1:j\d+>>     Not [<<Arg>>]
   /// CHECK-DAG:     <<Not2:j\d+>>     Not [<<Not1>>]
   /// CHECK-DAG:                       Return [<<Not2>>]
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Not
 
-  public static long NotNot1(long arg) {
+  public static long $noinline$NotNot1(long arg) {
+    if (doThrow) { throw new Error(); }
     return ~~arg;
   }
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not1:i\d+>>     Not [<<Arg>>]
   /// CHECK-DAG:     <<Not2:i\d+>>     Not [<<Not1>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Not2>>,<<Not1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Not>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (after)
   /// CHECK:                           Not
   /// CHECK-NOT:                       Not
 
-  public static int NotNot2(int arg) {
+  public static int $noinline$NotNot2(int arg) {
+    if (doThrow) { throw new Error(); }
     int temp = ~arg;
     return temp + ~temp;
   }
@@ -887,24 +1027,25 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
    */
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Neg>>,<<Arg2>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg1>>,<<Arg2>>]
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static int SubNeg1(int arg1, int arg2) {
+  public static int $noinline$SubNeg1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 - arg2;
   }
 
@@ -918,7 +1059,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
@@ -927,7 +1068,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
@@ -936,10 +1077,11 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Add
 
-  public static int SubNeg2(int arg1, int arg2) {
+  public static int $noinline$SubNeg2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp = -arg1;
     return (temp - arg2) | (temp - arg2);
   }
@@ -951,7 +1093,7 @@
    * the loop.
    */
 
-  /// CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$SubNeg3(long, long) instruction_simplifier (before)
   //  -------------- Arguments and initial negation operation.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -962,7 +1104,7 @@
   /// CHECK:         <<Sub:j\d+>>      Sub [<<Neg>>,<<Arg2>>]
   /// CHECK:                           Goto
 
-  /// CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$SubNeg3(long, long) instruction_simplifier (after)
   //  -------------- Arguments and initial negation operation.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -974,7 +1116,8 @@
   /// CHECK-NOT:                       Neg
   /// CHECK:                           Goto
 
-  public static long SubNeg3(long arg1, long arg2) {
+  public static long $noinline$SubNeg3(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long res = 0;
     long temp = -arg1;
     for (long i = 0; i < 1; i++) {
@@ -983,7 +1126,7 @@
     return res;
   }
 
-  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -993,15 +1136,22 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
   /// CHECK-DAG:                       Return [<<True>>]
 
-  public static boolean EqualBoolVsIntConst(boolean arg) {
-    return (arg ? 0 : 1) != 2;
+  public static boolean $noinline$EqualBoolVsIntConst(boolean arg) {
+    if (doThrow) { throw new Error(); }
+    // Make calls that will be inlined to make sure the instruction simplifier
+    // sees the simplification (dead code elimination will also try to simplify it).
+    return (arg ? $inline$ReturnArg(0) : $inline$ReturnArg(1)) != 2;
   }
 
-  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  public static int $inline$ReturnArg(int arg) {
+    return arg;
+  }
+
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -1011,12 +1161,15 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
   /// CHECK-DAG:                       Return [<<False>>]
 
-  public static boolean NotEqualBoolVsIntConst(boolean arg) {
-    return (arg ? 0 : 1) == 2;
+  public static boolean $noinline$NotEqualBoolVsIntConst(boolean arg) {
+    if (doThrow) { throw new Error(); }
+    // Make calls that will be inlined to make sure the instruction simplifier
+    // sees the simplification (dead code elimination will also try to simplify it).
+    return (arg ? $inline$ReturnArg(0) : $inline$ReturnArg(1)) == 2;
   }
 
   /*
@@ -1025,7 +1178,7 @@
    * remove the second.
    */
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>    IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
@@ -1033,7 +1186,7 @@
   /// CHECK-DAG:     <<NotNotArg:i\d+>> Select [<<Const1>>,<<Const0>>,<<NotArg>>]
   /// CHECK-DAG:                        Return [<<NotNotArg>>]
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
@@ -1041,81 +1194,86 @@
     return !arg;
   }
 
-  public static boolean NotNotBool(boolean arg) {
+  public static boolean $noinline$NotNotBool(boolean arg) {
+    if (doThrow) { throw new Error(); }
     return !(NegateValue(arg));
   }
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (before)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const2:f\d+>>   FloatConstant 2
   /// CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<Const2>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstP5:f\d+>>  FloatConstant 0.5
   /// CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstP5>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
 
-  public static float Div2(float arg) {
+  public static float $noinline$Div2(float arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 2.0f;
   }
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const2:d\d+>>   DoubleConstant 2
   /// CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<Const2>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstP5:d\d+>>  DoubleConstant 0.5
   /// CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstP5>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
-  public static double Div2(double arg) {
+  public static double $noinline$Div2(double arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 2.0;
   }
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (before)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstMP25:f\d+>>   FloatConstant -0.25
   /// CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<ConstMP25>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstM4:f\d+>>  FloatConstant -4
   /// CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstM4>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
 
-  public static float DivMP25(float arg) {
+  public static float $noinline$DivMP25(float arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstMP25:d\d+>>   DoubleConstant -0.25
   /// CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<ConstMP25>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstM4:d\d+>>  DoubleConstant -4
   /// CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstM4>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
-  public static double DivMP25(double arg) {
+  public static double $noinline$DivMP25(double arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
@@ -1123,18 +1281,19 @@
    * Test strength reduction of factors of the form (2^n + 1).
    */
 
-  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$mulPow2Plus1(int) instruction_simplifier (before)
   /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const9:i\d+>>      IntConstant 9
   /// CHECK:                            Mul [<<Arg>>,<<Const9>>]
 
-  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$mulPow2Plus1(int) instruction_simplifier (after)
   /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const3:i\d+>>      IntConstant 3
   /// CHECK:       <<Shift:i\d+>>       Shl [<<Arg>>,<<Const3>>]
   /// CHECK-NEXT:                       Add [<<Arg>>,<<Shift>>]
 
-  public static int mulPow2Plus1(int arg) {
+  public static int $noinline$mulPow2Plus1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 9;
   }
 
@@ -1142,62 +1301,69 @@
    * Test strength reduction of factors of the form (2^n - 1).
    */
 
-  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$mulPow2Minus1(long) instruction_simplifier (before)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
   /// CHECK:                            Mul [<<Const31>>,<<Arg>>]
 
-  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$mulPow2Minus1(long) instruction_simplifier (after)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const5:i\d+>>      IntConstant 5
   /// CHECK:       <<Shift:j\d+>>       Shl [<<Arg>>,<<Const5>>]
   /// CHECK-NEXT:                       Sub [<<Shift>>,<<Arg>>]
 
-  public static long mulPow2Minus1(long arg) {
+  public static long $noinline$mulPow2Minus1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 31;
   }
 
-  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_bce (before)
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<Field>>,<<Const1>>]
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int booleanFieldNotEqualOne() {
+  public static int $noinline$booleanFieldNotEqualOne() {
+    if (doThrow) { throw new Error(); }
     return (booleanField == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_bce (before)
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<NE:z\d+>>       Equal [<<Field>>,<<Const0>>]
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int booleanFieldEqualZero() {
+  public static int $noinline$booleanFieldEqualZero() {
+    if (doThrow) { throw new Error(); }
     return (booleanField != $inline$false()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1210,7 +1376,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1221,11 +1387,12 @@
   // Note that we match `LE` from Select because there are two identical
   // LessThanOrEqual instructions.
 
-  public static int intConditionNotEqualOne(int i) {
+  public static int $noinline$intConditionNotEqualOne(int i) {
+    if (doThrow) { throw new Error(); }
     return ((i > 42) == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1238,7 +1405,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1249,16 +1416,17 @@
   // Note that we match `LE` from Select because there are two identical
   // LessThanOrEqual instructions.
 
-  public static int intConditionEqualZero(int i) {
+  public static int $noinline$intConditionEqualZero(int i) {
+    if (doThrow) { throw new Error(); }
     return ((i > 42) != $inline$false()) ? 13 : 54;
   }
 
   // Test that conditions on float/double are not flipped.
 
-  /// CHECK-START: int Main.floatConditionNotEqualOne(float) builder (after)
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.floatConditionNotEqualOne(float) instruction_simplifier_before_codegen (after)
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) instruction_simplifier$before_codegen (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1267,14 +1435,15 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int floatConditionNotEqualOne(float f) {
+  public static int $noinline$floatConditionNotEqualOne(float f) {
+    if (doThrow) { throw new Error(); }
     return ((f > 42.0f) == true) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.doubleConditionEqualZero(double) builder (after)
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.doubleConditionEqualZero(double) instruction_simplifier_before_codegen (after)
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) instruction_simplifier$before_codegen (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1283,42 +1452,45 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int doubleConditionEqualZero(double d) {
+  public static int $noinline$doubleConditionEqualZero(double d) {
+    if (doThrow) { throw new Error(); }
     return ((d > 42.0) != false) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static int intToDoubleToInt(int value) {
+  public static int $noinline$intToDoubleToInt(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by a conversion back.
     return (int) (double) value;
   }
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      {{d\d+}}          TypeConversion [<<Arg>>]
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static String intToDoubleToIntPrint(int value) {
+  public static String $noinline$intToDoubleToIntPrint(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by a conversion back
     // with another use of the intermediate result.
     double d = (double) value;
@@ -1326,55 +1498,58 @@
     return "d=" + d + ", i=" + i;
   }
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static int byteToDoubleToInt(byte value) {
+  public static int $noinline$byteToDoubleToInt(byte value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion, use implicit conversion.
     return (int) (double) value;
   }
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static int floatToDoubleToInt(float value) {
+  public static int $noinline$floatToDoubleToInt(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion.
     return (int) (double) value;
   }
 
-  /// CHECK-START: java.lang.String Main.floatToDoubleToIntPrint(float) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$floatToDoubleToIntPrint(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  /// CHECK-START: java.lang.String Main.floatToDoubleToIntPrint(float) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$floatToDoubleToIntPrint(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  public static String floatToDoubleToIntPrint(float value) {
+  public static String $noinline$floatToDoubleToIntPrint(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion with
     // an extra use of the intermediate result.
     double d = (double) value;
@@ -1382,176 +1557,186 @@
     return "d=" + d + ", i=" + i;
   }
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static short byteToDoubleToShort(byte value) {
+  public static short $noinline$byteToDoubleToShort(byte value) {
+    if (doThrow) { throw new Error(); }
     // Originally, this is byte->double->int->short. The first conversion is lossless,
     // so we merge this with the second one to byte->int which we omit as it's an implicit
     // conversion. Then we eliminate the resulting byte->short as an implicit conversion.
     return (short) (double) value;
   }
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static short charToDoubleToShort(char value) {
+  public static short $noinline$charToDoubleToShort(char value) {
+    if (doThrow) { throw new Error(); }
     // Originally, this is char->double->int->short. The first conversion is lossless,
     // so we merge this with the second one to char->int which we omit as it's an implicit
     // conversion. Then we are left with the resulting char->short conversion.
     return (short) (double) value;
   }
 
-  /// CHECK-START: short Main.floatToIntToShort(float) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$floatToIntToShort(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.floatToIntToShort(float) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$floatToIntToShort(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  public static short floatToIntToShort(float value) {
+  public static short $noinline$floatToIntToShort(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossy FP to integral conversion followed by another conversion: no simplification.
     return (short) value;
   }
 
-  /// CHECK-START: int Main.intToFloatToInt(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intToFloatToInt(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.intToFloatToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToFloatToInt(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  public static int intToFloatToInt(int value) {
+  public static int $noinline$intToFloatToInt(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossy integral to FP conversion followed another conversion: no simplification.
     return (int) (float) value;
   }
 
-  /// CHECK-START: double Main.longToIntToDouble(long) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$longToIntToDouble(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  /// CHECK-START: double Main.longToIntToDouble(long) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$longToIntToDouble(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  public static double longToIntToDouble(long value) {
+  public static double $noinline$longToIntToDouble(long value) {
+    if (doThrow) { throw new Error(); }
     // Lossy long-to-int conversion followed an integral to FP conversion: no simplification.
     return (double) (int) value;
   }
 
-  /// CHECK-START: long Main.longToIntToLong(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$longToIntToLong(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Long>>]
 
-  /// CHECK-START: long Main.longToIntToLong(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$longToIntToLong(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Long>>]
 
-  public static long longToIntToLong(long value) {
+  public static long $noinline$longToIntToLong(long value) {
+    if (doThrow) { throw new Error(); }
     // Lossy long-to-int conversion followed an int-to-long conversion: no simplification.
     return (long) (int) value;
   }
 
-  /// CHECK-START: short Main.shortToCharToShort(short) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$shortToCharToShort(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Char>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.shortToCharToShort(short) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$shortToCharToShort(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  public static short shortToCharToShort(short value) {
+  public static short $noinline$shortToCharToShort(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion to original type.
     return (short) (char) value;
   }
 
-  /// CHECK-START: int Main.shortToLongToInt(short) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$shortToLongToInt(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Long>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.shortToLongToInt(short) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$shortToLongToInt(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  public static int shortToLongToInt(short value) {
+  public static int $noinline$shortToLongToInt(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion, use implicit conversion.
     return (int) (long) value;
   }
 
-  /// CHECK-START: byte Main.shortToCharToByte(short) instruction_simplifier (before)
+  /// CHECK-START: byte Main.$noinline$shortToCharToByte(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Char>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.shortToCharToByte(short) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$shortToCharToByte(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  public static byte shortToCharToByte(short value) {
+  public static byte $noinline$shortToCharToByte(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion losing bits
     // from the original type. Simplify to use only one conversion.
     return (byte) (char) value;
   }
 
-  /// CHECK-START: java.lang.String Main.shortToCharToBytePrint(short) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$shortToCharToBytePrint(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
 
-  /// CHECK-START: java.lang.String Main.shortToCharToBytePrint(short) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$shortToCharToBytePrint(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
 
-  public static String shortToCharToBytePrint(short value) {
+  public static String $noinline$shortToCharToBytePrint(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion losing bits
     // from the original type with an extra use of the intermediate result.
     char c = (char) value;
@@ -1559,7 +1744,7 @@
     return "c=" + ((int) c) + ", b=" + ((int) b);  // implicit conversions.
   }
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (before)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 255
   /// CHECK-DAG:      <<And:j\d+>>      And [<<Mask>>,<<Arg>>]
@@ -1567,58 +1752,61 @@
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (after)
   /// CHECK-NOT:                        And
 
-  public static byte longAnd0xffToByte(long value) {
+  public static byte $noinline$longAnd0xffToByte(long value) {
+    if (doThrow) { throw new Error(); }
     return (byte) (value & 0xff);
   }
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (before)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 131071
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Char>>]
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Char>>]
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (after)
   /// CHECK-NOT:                        And
 
-  public static char intAnd0x1ffffToChar(int value) {
+  public static char $noinline$intAnd0x1ffffToChar(int value) {
+    if (doThrow) { throw new Error(); }
     // Keeping all significant bits and one more.
     return (char) (value & 0x1ffff);
   }
 
-  /// CHECK-START: short Main.intAnd0x17fffToShort(int) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$intAnd0x17fffToShort(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.intAnd0x17fffToShort(int) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$intAnd0x17fffToShort(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  public static short intAnd0x17fffToShort(int value) {
+  public static short $noinline$intAnd0x17fffToShort(int value) {
+    if (doThrow) { throw new Error(); }
     // No simplification: clearing a significant bit.
     return (short) (value & 0x17fff);
   }
 
-  /// CHECK-START: double Main.shortAnd0xffffToShortToDouble(short) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$shortAnd0xffffToShortToDouble(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 65535
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
@@ -1626,203 +1814,509 @@
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Same>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  /// CHECK-START: double Main.shortAnd0xffffToShortToDouble(short) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$shortAnd0xffffToShortToDouble(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  public static double shortAnd0xffffToShortToDouble(short value) {
+  public static double $noinline$shortAnd0xffffToShortToDouble(short value) {
+    if (doThrow) { throw new Error(); }
     short same = (short) (value & 0xffff);
     return (double) same;
   }
 
-  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intReverseCondition(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
   /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Const42>>,<<Arg>>]
 
-  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intReverseCondition(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
   /// CHECK-DAG:      <<GE:z\d+>>       GreaterThanOrEqual [<<Arg>>,<<Const42>>]
 
-  public static int intReverseCondition(int i) {
+  public static int $noinline$intReverseCondition(int i) {
+    if (doThrow) { throw new Error(); }
     return (42 > i) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intReverseConditionNaN(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
   /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
   /// CHECK-DAG:      <<CMP:i\d+>>      Compare [<<Const42>>,<<Result>>]
 
-  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intReverseConditionNaN(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
   /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
   /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Result>>,<<Const42>>]
 
-  public static int intReverseConditionNaN(int i) {
+  public static int $noinline$intReverseConditionNaN(int i) {
+    if (doThrow) { throw new Error(); }
     return (42 != Math.sqrt(i)) ? 13 : 54;
   }
 
-  public static int runSmaliTest(String name, boolean input) {
+  public static int $noinline$runSmaliTest(String name, boolean input) {
+    if (doThrow) { throw new Error(); }
     try {
       Class<?> c = Class.forName("SmaliTests");
-      Method m = c.getMethod(name, new Class[] { boolean.class });
+      Method m = c.getMethod(name, boolean.class);
       return (Integer) m.invoke(null, input);
     } catch (Exception ex) {
       throw new Error(ex);
     }
   }
 
-public static void main(String[] args) {
-    int arg = 123456;
+  public static int $noinline$runSmaliTestConst(String name, int arg) {
+    if (doThrow) { throw new Error(); }
+    try {
+      Class<?> c = Class.forName("SmaliTests");
+      Method m = c.getMethod(name, int.class);
+      return (Integer) m.invoke(null, arg);
+    } catch (Exception ex) {
+      throw new Error(ex);
+    }
+  }
 
-    assertLongEquals(Add0(arg), arg);
-    assertIntEquals(AndAllOnes(arg), arg);
-    assertLongEquals(Div1(arg), arg);
-    assertIntEquals(DivN1(arg), -arg);
-    assertLongEquals(Mul1(arg), arg);
-    assertIntEquals(MulN1(arg), -arg);
-    assertLongEquals(MulPowerOfTwo128(arg), (128 * arg));
-    assertIntEquals(Or0(arg), arg);
-    assertLongEquals(OrSame(arg), arg);
-    assertIntEquals(Shl0(arg), arg);
-    assertLongEquals(Shr0(arg), arg);
-    assertLongEquals(Shr64(arg), arg);
-    assertLongEquals(Sub0(arg), arg);
-    assertIntEquals(SubAliasNeg(arg), -arg);
-    assertLongEquals(UShr0(arg), arg);
-    assertIntEquals(Xor0(arg), arg);
-    assertIntEquals(XorAllOnes(arg), ~arg);
-    assertIntEquals(AddNegs1(arg, arg + 1), -(arg + arg + 1));
-    assertIntEquals(AddNegs2(arg, arg + 1), -(arg + arg + 1));
-    assertLongEquals(AddNegs3(arg, arg + 1), -(2 * arg + 1));
-    assertLongEquals(AddNeg1(arg, arg + 1), 1);
-    assertLongEquals(AddNeg2(arg, arg + 1), -1);
-    assertLongEquals(NegNeg1(arg), arg);
-    assertIntEquals(NegNeg2(arg), 0);
-    assertLongEquals(NegNeg3(arg), arg);
-    assertIntEquals(NegSub1(arg, arg + 1), 1);
-    assertIntEquals(NegSub2(arg, arg + 1), 1);
-    assertLongEquals(NotNot1(arg), arg);
-    assertIntEquals(NotNot2(arg), -1);
-    assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1));
-    assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1));
-    assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1));
-    assertBooleanEquals(EqualBoolVsIntConst(true), true);
-    assertBooleanEquals(EqualBoolVsIntConst(true), true);
-    assertBooleanEquals(NotEqualBoolVsIntConst(false), false);
-    assertBooleanEquals(NotEqualBoolVsIntConst(false), false);
-    assertBooleanEquals(NotNotBool(true), true);
-    assertBooleanEquals(NotNotBool(false), false);
-    assertFloatEquals(Div2(100.0f), 50.0f);
-    assertDoubleEquals(Div2(150.0), 75.0);
-    assertFloatEquals(DivMP25(100.0f), -400.0f);
-    assertDoubleEquals(DivMP25(150.0), -600.0);
-    assertIntEquals(UShr28And15(0xc1234567), 0xc);
-    assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL);
-    assertIntEquals(UShr28And7(0xc1234567), 0x4);
-    assertLongEquals(UShr60And7(0xc123456787654321L), 0x4L);
-    assertIntEquals(Shr24And255(0xc1234567), 0xc1);
-    assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L);
-    assertIntEquals(Shr24And127(0xc1234567), 0x41);
-    assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L);
-    assertIntEquals(0, mulPow2Plus1(0));
-    assertIntEquals(9, mulPow2Plus1(1));
-    assertIntEquals(18, mulPow2Plus1(2));
-    assertIntEquals(900, mulPow2Plus1(100));
-    assertIntEquals(111105, mulPow2Plus1(12345));
-    assertLongEquals(0, mulPow2Minus1(0));
-    assertLongEquals(31, mulPow2Minus1(1));
-    assertLongEquals(62, mulPow2Minus1(2));
-    assertLongEquals(3100, mulPow2Minus1(100));
-    assertLongEquals(382695, mulPow2Minus1(12345));
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Shl:i\d+>>      Shl [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  public static int $noinline$intUnnecessaryShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value << (shift & 31);
+  }
+
+  /// CHECK-START: long Main.$noinline$longUnnecessaryShiftMasking(long, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const63:i\d+>>  IntConstant 63
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const63>>]
+  /// CHECK-DAG:      <<Shr:j\d+>>      Shr [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shr>>]
+
+  /// CHECK-START: long Main.$noinline$longUnnecessaryShiftMasking(long, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Shr:j\d+>>      Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<Shr>>]
+
+  public static long $noinline$longUnnecessaryShiftMasking(long value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value >> (shift & 63);
+  }
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryWiderShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const255:i\d+>> IntConstant 255
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const255>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>     UShr [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<UShr>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryWiderShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<UShr:i\d+>>     UShr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<UShr>>]
+
+  public static int $noinline$intUnnecessaryWiderShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value >>> (shift & 0xff);
+  }
+
+  /// CHECK-START: long Main.$noinline$longSmallerShiftMasking(long, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const3>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  /// CHECK-START: long Main.$noinline$longSmallerShiftMasking(long, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const3>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  public static long $noinline$longSmallerShiftMasking(long value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value << (shift & 3);
+  }
+
+  /// CHECK-START: int Main.$noinline$otherUseOfUnnecessaryShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shr:i\d+>>      Shr [<<Value>>,<<And>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shr>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  /// CHECK-START: int Main.$noinline$otherUseOfUnnecessaryShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shr:i\d+>>      Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shr>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static int $noinline$otherUseOfUnnecessaryShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    int temp = shift & 31;
+    return (value >> temp) + temp;
+  }
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Y>>]
+
+  public static int $noinline$intAddSubSimplifyArg1(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sum = x + y;
+    return sum - x;
+  }
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg2(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg2(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intAddSubSimplifyArg2(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sum = x + y;
+    return sum - y;
+  }
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyLeft(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyLeft(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intSubAddSimplifyLeft(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sub = x - y;
+    return sub + y;
+  }
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyRight(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyRight(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intSubAddSimplifyRight(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sub = x - y;
+    return y + sub;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg1(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg1(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatAddSubSimplifyArg1(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sum = x + y;
+    return sum - x;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg2(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg2(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatAddSubSimplifyArg2(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sum = x + y;
+    return sum - y;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyLeft(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyLeft(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatSubAddSimplifyLeft(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sub = x - y;
+    return sub + y;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyRight(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyRight(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatSubAddSimplifyRight(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sub = x - y;
+    return y + sub;
+  }
+
+ public static void main(String[] args) {
+    int arg = 123456;
+    float floatArg = 123456.125f;
+
+    assertLongEquals(arg, $noinline$Add0(arg));
+    assertIntEquals(5, $noinline$AddAddSubAddConst(1));
+    assertIntEquals(arg, $noinline$AndAllOnes(arg));
+    assertLongEquals(arg, $noinline$Div1(arg));
+    assertIntEquals(-arg, $noinline$DivN1(arg));
+    assertLongEquals(arg, $noinline$Mul1(arg));
+    assertIntEquals(-arg, $noinline$MulN1(arg));
+    assertLongEquals((128 * arg), $noinline$MulPowerOfTwo128(arg));
+    assertLongEquals(2640, $noinline$MulMulMulConst(2));
+    assertIntEquals(arg, $noinline$Or0(arg));
+    assertLongEquals(arg, $noinline$OrSame(arg));
+    assertIntEquals(arg, $noinline$Shl0(arg));
+    assertLongEquals(arg, $noinline$Shr0(arg));
+    assertLongEquals(arg, $noinline$Shr64(arg));
+    assertLongEquals(arg, $noinline$Sub0(arg));
+    assertIntEquals(-arg, $noinline$SubAliasNeg(arg));
+    assertIntEquals(9, $noinline$SubAddConst1(2));
+    assertIntEquals(-2, $noinline$SubAddConst2(3));
+    assertLongEquals(3, $noinline$SubSubConst(4));
+    assertLongEquals(arg, $noinline$UShr0(arg));
+    assertIntEquals(arg, $noinline$Xor0(arg));
+    assertIntEquals(~arg, $noinline$XorAllOnes(arg));
+    assertIntEquals(-(arg + arg + 1), $noinline$AddNegs1(arg, arg + 1));
+    assertIntEquals(-(arg + arg + 1), $noinline$AddNegs2(arg, arg + 1));
+    assertLongEquals(-(2 * arg + 1), $noinline$AddNegs3(arg, arg + 1));
+    assertLongEquals(1, $noinline$AddNeg1(arg, arg + 1));
+    assertLongEquals(-1, $noinline$AddNeg2(arg, arg + 1));
+    assertLongEquals(arg, $noinline$NegNeg1(arg));
+    assertIntEquals(0, $noinline$NegNeg2(arg));
+    assertLongEquals(arg, $noinline$NegNeg3(arg));
+    assertIntEquals(1, $noinline$NegSub1(arg, arg + 1));
+    assertIntEquals(1, $noinline$NegSub2(arg, arg + 1));
+    assertLongEquals(arg, $noinline$NotNot1(arg));
+    assertIntEquals(-1, $noinline$NotNot2(arg));
+    assertIntEquals(-(arg + arg + 1), $noinline$SubNeg1(arg, arg + 1));
+    assertIntEquals(-(arg + arg + 1), $noinline$SubNeg2(arg, arg + 1));
+    assertLongEquals(-(2 * arg + 1), $noinline$SubNeg3(arg, arg + 1));
+    assertBooleanEquals(true, $noinline$EqualBoolVsIntConst(true));
+    assertBooleanEquals(true, $noinline$EqualBoolVsIntConst(true));
+    assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
+    assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
+    assertBooleanEquals(true, $noinline$NotNotBool(true));
+    assertBooleanEquals(false, $noinline$NotNotBool(false));
+    assertFloatEquals(50.0f, $noinline$Div2(100.0f));
+    assertDoubleEquals(75.0, $noinline$Div2(150.0));
+    assertFloatEquals(-400.0f, $noinline$DivMP25(100.0f));
+    assertDoubleEquals(-600.0, $noinline$DivMP25(150.0));
+    assertIntEquals(0xc, $noinline$UShr28And15(0xc1234567));
+    assertLongEquals(0xcL, $noinline$UShr60And15(0xc123456787654321L));
+    assertIntEquals(0x4, $noinline$UShr28And7(0xc1234567));
+    assertLongEquals(0x4L, $noinline$UShr60And7(0xc123456787654321L));
+    assertIntEquals(0xc1, $noinline$Shr24And255(0xc1234567));
+    assertLongEquals(0xc1L, $noinline$Shr56And255(0xc123456787654321L));
+    assertIntEquals(0x41, $noinline$Shr24And127(0xc1234567));
+    assertLongEquals(0x41L, $noinline$Shr56And127(0xc123456787654321L));
+    assertIntEquals(0, $noinline$mulPow2Plus1(0));
+    assertIntEquals(9, $noinline$mulPow2Plus1(1));
+    assertIntEquals(18, $noinline$mulPow2Plus1(2));
+    assertIntEquals(900, $noinline$mulPow2Plus1(100));
+    assertIntEquals(111105, $noinline$mulPow2Plus1(12345));
+    assertLongEquals(0, $noinline$mulPow2Minus1(0));
+    assertLongEquals(31, $noinline$mulPow2Minus1(1));
+    assertLongEquals(62, $noinline$mulPow2Minus1(2));
+    assertLongEquals(3100, $noinline$mulPow2Minus1(100));
+    assertLongEquals(382695, $noinline$mulPow2Minus1(12345));
 
     booleanField = false;
-    assertIntEquals(booleanFieldNotEqualOne(), 54);
-    assertIntEquals(booleanFieldEqualZero(), 54);
+    assertIntEquals($noinline$booleanFieldNotEqualOne(), 54);
+    assertIntEquals($noinline$booleanFieldEqualZero(), 54);
     booleanField = true;
-    assertIntEquals(booleanFieldNotEqualOne(), 13);
-    assertIntEquals(booleanFieldEqualZero(), 13);
-    assertIntEquals(intConditionNotEqualOne(6), 54);
-    assertIntEquals(intConditionNotEqualOne(43), 13);
-    assertIntEquals(intConditionEqualZero(6), 54);
-    assertIntEquals(intConditionEqualZero(43), 13);
-    assertIntEquals(floatConditionNotEqualOne(6.0f), 54);
-    assertIntEquals(floatConditionNotEqualOne(43.0f), 13);
-    assertIntEquals(doubleConditionEqualZero(6.0), 54);
-    assertIntEquals(doubleConditionEqualZero(43.0), 13);
+    assertIntEquals(13, $noinline$booleanFieldNotEqualOne());
+    assertIntEquals(13, $noinline$booleanFieldEqualZero());
+    assertIntEquals(54, $noinline$intConditionNotEqualOne(6));
+    assertIntEquals(13, $noinline$intConditionNotEqualOne(43));
+    assertIntEquals(54, $noinline$intConditionEqualZero(6));
+    assertIntEquals(13, $noinline$intConditionEqualZero(43));
+    assertIntEquals(54, $noinline$floatConditionNotEqualOne(6.0f));
+    assertIntEquals(13, $noinline$floatConditionNotEqualOne(43.0f));
+    assertIntEquals(54, $noinline$doubleConditionEqualZero(6.0));
+    assertIntEquals(13, $noinline$doubleConditionEqualZero(43.0));
 
-    assertIntEquals(1234567, intToDoubleToInt(1234567));
-    assertIntEquals(Integer.MIN_VALUE, intToDoubleToInt(Integer.MIN_VALUE));
-    assertIntEquals(Integer.MAX_VALUE, intToDoubleToInt(Integer.MAX_VALUE));
-    assertStringEquals("d=7654321.0, i=7654321", intToDoubleToIntPrint(7654321));
-    assertIntEquals(12, byteToDoubleToInt((byte) 12));
-    assertIntEquals(Byte.MIN_VALUE, byteToDoubleToInt(Byte.MIN_VALUE));
-    assertIntEquals(Byte.MAX_VALUE, byteToDoubleToInt(Byte.MAX_VALUE));
-    assertIntEquals(11, floatToDoubleToInt(11.3f));
-    assertStringEquals("d=12.25, i=12", floatToDoubleToIntPrint(12.25f));
-    assertIntEquals(123, byteToDoubleToShort((byte) 123));
-    assertIntEquals(Byte.MIN_VALUE, byteToDoubleToShort(Byte.MIN_VALUE));
-    assertIntEquals(Byte.MAX_VALUE, byteToDoubleToShort(Byte.MAX_VALUE));
-    assertIntEquals(1234, charToDoubleToShort((char) 1234));
-    assertIntEquals(Character.MIN_VALUE, charToDoubleToShort(Character.MIN_VALUE));
-    assertIntEquals(/* sign-extended */ -1, charToDoubleToShort(Character.MAX_VALUE));
-    assertIntEquals(12345, floatToIntToShort(12345.75f));
-    assertIntEquals(Short.MAX_VALUE, floatToIntToShort((float)(Short.MIN_VALUE - 1)));
-    assertIntEquals(Short.MIN_VALUE, floatToIntToShort((float)(Short.MAX_VALUE + 1)));
-    assertIntEquals(-54321, intToFloatToInt(-54321));
-    assertDoubleEquals((double) 0x12345678, longToIntToDouble(0x1234567812345678L));
-    assertDoubleEquals(0.0, longToIntToDouble(Long.MIN_VALUE));
-    assertDoubleEquals(-1.0, longToIntToDouble(Long.MAX_VALUE));
-    assertLongEquals(0x0000000012345678L, longToIntToLong(0x1234567812345678L));
-    assertLongEquals(0xffffffff87654321L, longToIntToLong(0x1234567887654321L));
-    assertLongEquals(0L, longToIntToLong(Long.MIN_VALUE));
-    assertLongEquals(-1L, longToIntToLong(Long.MAX_VALUE));
-    assertIntEquals((short) -5678, shortToCharToShort((short) -5678));
-    assertIntEquals(Short.MIN_VALUE, shortToCharToShort(Short.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, shortToCharToShort(Short.MAX_VALUE));
-    assertIntEquals(5678, shortToLongToInt((short) 5678));
-    assertIntEquals(Short.MIN_VALUE, shortToLongToInt(Short.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, shortToLongToInt(Short.MAX_VALUE));
-    assertIntEquals(0x34, shortToCharToByte((short) 0x1234));
-    assertIntEquals(-0x10, shortToCharToByte((short) 0x12f0));
-    assertIntEquals(0, shortToCharToByte(Short.MIN_VALUE));
-    assertIntEquals(-1, shortToCharToByte(Short.MAX_VALUE));
-    assertStringEquals("c=1025, b=1", shortToCharToBytePrint((short) 1025));
-    assertStringEquals("c=1023, b=-1", shortToCharToBytePrint((short) 1023));
-    assertStringEquals("c=65535, b=-1", shortToCharToBytePrint((short) -1));
+    assertIntEquals(1234567, $noinline$intToDoubleToInt(1234567));
+    assertIntEquals(Integer.MIN_VALUE, $noinline$intToDoubleToInt(Integer.MIN_VALUE));
+    assertIntEquals(Integer.MAX_VALUE, $noinline$intToDoubleToInt(Integer.MAX_VALUE));
+    assertStringEquals("d=7654321.0, i=7654321", $noinline$intToDoubleToIntPrint(7654321));
+    assertIntEquals(12, $noinline$byteToDoubleToInt((byte) 12));
+    assertIntEquals(Byte.MIN_VALUE, $noinline$byteToDoubleToInt(Byte.MIN_VALUE));
+    assertIntEquals(Byte.MAX_VALUE, $noinline$byteToDoubleToInt(Byte.MAX_VALUE));
+    assertIntEquals(11, $noinline$floatToDoubleToInt(11.3f));
+    assertStringEquals("d=12.25, i=12", $noinline$floatToDoubleToIntPrint(12.25f));
+    assertIntEquals(123, $noinline$byteToDoubleToShort((byte) 123));
+    assertIntEquals(Byte.MIN_VALUE, $noinline$byteToDoubleToShort(Byte.MIN_VALUE));
+    assertIntEquals(Byte.MAX_VALUE, $noinline$byteToDoubleToShort(Byte.MAX_VALUE));
+    assertIntEquals(1234, $noinline$charToDoubleToShort((char) 1234));
+    assertIntEquals(Character.MIN_VALUE, $noinline$charToDoubleToShort(Character.MIN_VALUE));
+    assertIntEquals(/* sign-extended */ -1, $noinline$charToDoubleToShort(Character.MAX_VALUE));
+    assertIntEquals(12345, $noinline$floatToIntToShort(12345.75f));
+    assertIntEquals(Short.MAX_VALUE, $noinline$floatToIntToShort((float)(Short.MIN_VALUE - 1)));
+    assertIntEquals(Short.MIN_VALUE, $noinline$floatToIntToShort((float)(Short.MAX_VALUE + 1)));
+    assertIntEquals(-54321, $noinline$intToFloatToInt(-54321));
+    assertDoubleEquals((double) 0x12345678, $noinline$longToIntToDouble(0x1234567812345678L));
+    assertDoubleEquals(0.0, $noinline$longToIntToDouble(Long.MIN_VALUE));
+    assertDoubleEquals(-1.0, $noinline$longToIntToDouble(Long.MAX_VALUE));
+    assertLongEquals(0x0000000012345678L, $noinline$longToIntToLong(0x1234567812345678L));
+    assertLongEquals(0xffffffff87654321L, $noinline$longToIntToLong(0x1234567887654321L));
+    assertLongEquals(0L, $noinline$longToIntToLong(Long.MIN_VALUE));
+    assertLongEquals(-1L, $noinline$longToIntToLong(Long.MAX_VALUE));
+    assertIntEquals((short) -5678, $noinline$shortToCharToShort((short) -5678));
+    assertIntEquals(Short.MIN_VALUE, $noinline$shortToCharToShort(Short.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$shortToCharToShort(Short.MAX_VALUE));
+    assertIntEquals(5678, $noinline$shortToLongToInt((short) 5678));
+    assertIntEquals(Short.MIN_VALUE, $noinline$shortToLongToInt(Short.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$shortToLongToInt(Short.MAX_VALUE));
+    assertIntEquals(0x34, $noinline$shortToCharToByte((short) 0x1234));
+    assertIntEquals(-0x10, $noinline$shortToCharToByte((short) 0x12f0));
+    assertIntEquals(0, $noinline$shortToCharToByte(Short.MIN_VALUE));
+    assertIntEquals(-1, $noinline$shortToCharToByte(Short.MAX_VALUE));
+    assertStringEquals("c=1025, b=1", $noinline$shortToCharToBytePrint((short) 1025));
+    assertStringEquals("c=1023, b=-1", $noinline$shortToCharToBytePrint((short) 1023));
+    assertStringEquals("c=65535, b=-1", $noinline$shortToCharToBytePrint((short) -1));
 
-    assertIntEquals(0x21, longAnd0xffToByte(0x1234432112344321L));
-    assertIntEquals(0, longAnd0xffToByte(Long.MIN_VALUE));
-    assertIntEquals(-1, longAnd0xffToByte(Long.MAX_VALUE));
-    assertIntEquals(0x1234, intAnd0x1ffffToChar(0x43211234));
-    assertIntEquals(0, intAnd0x1ffffToChar(Integer.MIN_VALUE));
-    assertIntEquals(Character.MAX_VALUE, intAnd0x1ffffToChar(Integer.MAX_VALUE));
-    assertIntEquals(0x4321, intAnd0x17fffToShort(0x87654321));
-    assertIntEquals(0x0888, intAnd0x17fffToShort(0x88888888));
-    assertIntEquals(0, intAnd0x17fffToShort(Integer.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, intAnd0x17fffToShort(Integer.MAX_VALUE));
+    assertIntEquals(0x21, $noinline$longAnd0xffToByte(0x1234432112344321L));
+    assertIntEquals(0, $noinline$longAnd0xffToByte(Long.MIN_VALUE));
+    assertIntEquals(-1, $noinline$longAnd0xffToByte(Long.MAX_VALUE));
+    assertIntEquals(0x1234, $noinline$intAnd0x1ffffToChar(0x43211234));
+    assertIntEquals(0, $noinline$intAnd0x1ffffToChar(Integer.MIN_VALUE));
+    assertIntEquals(Character.MAX_VALUE, $noinline$intAnd0x1ffffToChar(Integer.MAX_VALUE));
+    assertIntEquals(0x4321, $noinline$intAnd0x17fffToShort(0x87654321));
+    assertIntEquals(0x0888, $noinline$intAnd0x17fffToShort(0x88888888));
+    assertIntEquals(0, $noinline$intAnd0x17fffToShort(Integer.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$intAnd0x17fffToShort(Integer.MAX_VALUE));
 
-    assertDoubleEquals(0.0, shortAnd0xffffToShortToDouble((short) 0));
-    assertDoubleEquals(1.0, shortAnd0xffffToShortToDouble((short) 1));
-    assertDoubleEquals(-2.0, shortAnd0xffffToShortToDouble((short) -2));
-    assertDoubleEquals(12345.0, shortAnd0xffffToShortToDouble((short) 12345));
-    assertDoubleEquals((double)Short.MAX_VALUE, shortAnd0xffffToShortToDouble(Short.MAX_VALUE));
-    assertDoubleEquals((double)Short.MIN_VALUE, shortAnd0xffffToShortToDouble(Short.MIN_VALUE));
+    assertDoubleEquals(0.0, $noinline$shortAnd0xffffToShortToDouble((short) 0));
+    assertDoubleEquals(1.0, $noinline$shortAnd0xffffToShortToDouble((short) 1));
+    assertDoubleEquals(-2.0, $noinline$shortAnd0xffffToShortToDouble((short) -2));
+    assertDoubleEquals(12345.0, $noinline$shortAnd0xffffToShortToDouble((short) 12345));
+    assertDoubleEquals((double)Short.MAX_VALUE,
+                       $noinline$shortAnd0xffffToShortToDouble(Short.MAX_VALUE));
+    assertDoubleEquals((double)Short.MIN_VALUE,
+                       $noinline$shortAnd0xffffToShortToDouble(Short.MIN_VALUE));
 
-    assertIntEquals(intReverseCondition(41), 13);
-    assertIntEquals(intReverseConditionNaN(-5), 13);
+    assertIntEquals(13, $noinline$intReverseCondition(41));
+    assertIntEquals(13, $noinline$intReverseConditionNaN(-5));
 
     for (String condition : new String[] { "Equal", "NotEqual" }) {
       for (String constant : new String[] { "True", "False" }) {
         for (String side : new String[] { "Rhs", "Lhs" }) {
           String name = condition + constant + side;
-          assertIntEquals(runSmaliTest(name, true), 5);
-          assertIntEquals(runSmaliTest(name, false), 3);
+          assertIntEquals(5, $noinline$runSmaliTest(name, true));
+          assertIntEquals(3, $noinline$runSmaliTest(name, false));
         }
       }
     }
+
+    assertIntEquals(0, $noinline$runSmaliTestConst("AddSubConst", 1));
+    assertIntEquals(3, $noinline$runSmaliTestConst("SubAddConst", 2));
+    assertIntEquals(-16, $noinline$runSmaliTestConst("SubSubConst1", 3));
+    assertIntEquals(-5, $noinline$runSmaliTestConst("SubSubConst2", 4));
+    assertIntEquals(26, $noinline$runSmaliTestConst("SubSubConst3", 5));
+    assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3));
+    assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3 + 32));
+    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50));
+    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50 + 64));
+    assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10));
+    assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10 + 128));
+    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2));
+    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2 + 256));
+    assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13));
+    assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13 + 512));
+
+    assertIntEquals(654321, $noinline$intAddSubSimplifyArg1(arg, 654321));
+    assertIntEquals(arg, $noinline$intAddSubSimplifyArg2(arg, 654321));
+    assertIntEquals(arg, $noinline$intSubAddSimplifyLeft(arg, 654321));
+    assertIntEquals(arg, $noinline$intSubAddSimplifyRight(arg, 654321));
+    assertFloatEquals(654321.125f, $noinline$floatAddSubSimplifyArg1(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatAddSubSimplifyArg2(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatSubAddSimplifyLeft(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatSubAddSimplifyRight(floatArg, 654321.125f));
   }
 
   private static boolean $inline$true() { return true; }
diff --git a/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java b/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
index 171ade8..2056e2f 100644
--- a/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
+++ b/test/462-checker-inlining-across-dex-files/src-multidex/OtherDex.java
@@ -38,25 +38,25 @@
     return "OtherDex";
   }
 
-  public static Class returnOtherDexClass() {
+  public static Class<?> returnOtherDexClass() {
     return OtherDex.class;
   }
 
-  public static Class returnMainClass() {
+  public static Class<?> returnMainClass() {
     return Main.class;
   }
 
-  private static Class returnOtherDexClass2() {
+  private static Class<?> returnOtherDexClass2() {
     return OtherDex.class;
   }
 
-  public static Class returnOtherDexClassStaticCall() {
+  public static Class<?> returnOtherDexClassStaticCall() {
     // Do not call returnOtherDexClass, as it may have been flagged
     // as non-inlineable.
     return returnOtherDexClass2();
   }
 
-  public static Class returnOtherDexCallingMain() {
+  public static Class<?> returnOtherDexCallingMain() {
     return Main.getOtherClass();
   }
 
diff --git a/test/462-checker-inlining-across-dex-files/src/Main.java b/test/462-checker-inlining-across-dex-files/src/Main.java
index 1fe49a8..c2bb479 100644
--- a/test/462-checker-inlining-across-dex-files/src/Main.java
+++ b/test/462-checker-inlining-across-dex-files/src/Main.java
@@ -106,7 +106,7 @@
   /// CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
-  public static Class dontInlineOtherDexClass() {
+  public static Class<?> dontInlineOtherDexClass() {
     return OtherDex.returnOtherDexClass();
   }
 
@@ -123,7 +123,7 @@
   // Note: There are two LoadClass instructions. We obtain the correct
   //       instruction id by matching the Return's input list first.
 
-  public static Class inlineMainClass() {
+  public static Class<?> inlineMainClass() {
     return OtherDex.returnMainClass();
   }
 
@@ -135,7 +135,7 @@
   /// CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
-  public static Class dontInlineOtherDexClassStaticCall() {
+  public static Class<?> dontInlineOtherDexClassStaticCall() {
     return OtherDex.returnOtherDexClassStaticCall();
   }
 
@@ -152,11 +152,11 @@
   // Note: There are two LoadClass instructions. We obtain the correct
   //       instruction id by matching the Return's input list first.
 
-  public static Class inlineOtherDexCallingMain() {
+  public static Class<?> inlineOtherDexCallingMain() {
     return OtherDex.returnOtherDexCallingMain();
   }
 
-  public static Class getOtherClass() {
+  public static Class<?> getOtherClass() {
     return Main.class;
   }
 
diff --git a/test/471-uninitialized-locals/src/Main.java b/test/471-uninitialized-locals/src/Main.java
index a5b1c48..1ac749e 100644
--- a/test/471-uninitialized-locals/src/Main.java
+++ b/test/471-uninitialized-locals/src/Main.java
@@ -24,8 +24,8 @@
   public static void main(String args[]) throws Exception {
     try {
       Class<?> c = Class.forName("Test");
-      Method m = c.getMethod("ThrowException", (Class[]) null);
-      m.invoke(null, (Object[]) null);
+      Method m = c.getMethod("ThrowException");
+      m.invoke(null);
     } catch (VerifyError e) {
        // Compilation should go fine but we expect the runtime verification to fail.
       return;
diff --git a/test/472-unreachable-if-regression/src/Main.java b/test/472-unreachable-if-regression/src/Main.java
index c9f9511..d426df1 100644
--- a/test/472-unreachable-if-regression/src/Main.java
+++ b/test/472-unreachable-if-regression/src/Main.java
@@ -25,12 +25,12 @@
     System.out.println("Test started.");
     Class<?> c = Class.forName("Test");
 
-    Method unreachableIf = c.getMethod("UnreachableIf", (Class[]) null);
-    unreachableIf.invoke(null, (Object[]) null);
+    Method unreachableIf = c.getMethod("UnreachableIf");
+    unreachableIf.invoke(null);
     System.out.println("Successfully called UnreachableIf().");
 
-    Method unreachablePackedSwitch = c.getMethod("UnreachablePackedSwitch", (Class[]) null);
-    unreachablePackedSwitch.invoke(null, (Object[]) null);
+    Method unreachablePackedSwitch = c.getMethod("UnreachablePackedSwitch");
+    unreachablePackedSwitch.invoke(null);
     System.out.println("Successfully called UnreachablePackedSwitch().");
   }
 
diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java
index 6fc12f1..c2982b4 100644
--- a/test/478-checker-clinit-check-pruning/src/Main.java
+++ b/test/478-checker-clinit-check-pruning/src/Main.java
@@ -103,10 +103,8 @@
     static boolean doThrow = false;
 
     static void $noinline$staticMethod() {
-      if (doThrow) {
-        // Try defeating inlining.
-        throw new Error();
-      }
+      // Try defeating inlining.
+      if (doThrow) { throw new Error(); }
     }
   }
 
@@ -181,10 +179,8 @@
     static boolean doThrow = false;
 
     static void $noinline$staticMethod() {
-      if (doThrow) {
         // Try defeating inlining.
-        throw new Error();
-      }
+      if (doThrow) { throw new Error(); }
     }
   }
 
@@ -245,10 +241,8 @@
     static boolean doThrow = false;
 
     static void $noinline$staticMethod() {
-      if (doThrow) {
         // Try defeating inlining.
-        throw new Error();
-      }
+      if (doThrow) { throw new Error(); }
     }
 
     static {
@@ -314,7 +308,7 @@
 
   static void constClassAndInvokeStatic(Iterable<?> it) {
     $opt$inline$ignoreClass(ClassWithClinit7.class);
-    ClassWithClinit7.someStaticMethod(it);
+    ClassWithClinit7.$noinline$someStaticMethod(it);
   }
 
   static void $opt$inline$ignoreClass(Class<?> c) {
@@ -325,10 +319,10 @@
       System.out.println("Main$ClassWithClinit7's static initializer");
     }
 
-    // Note: not inlined from constClassAndInvokeStatic() but fully inlined from main().
-    static void someStaticMethod(Iterable<?> it) {
-      // We're not inlining invoke-interface at the moment.
+    static void $noinline$someStaticMethod(Iterable<?> it) {
       it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
     }
   }
 
@@ -345,7 +339,7 @@
 
   static void sgetAndInvokeStatic(Iterable<?> it) {
     $opt$inline$ignoreInt(ClassWithClinit8.value);
-    ClassWithClinit8.someStaticMethod(it);
+    ClassWithClinit8.$noinline$someStaticMethod(it);
   }
 
   static void $opt$inline$ignoreInt(int i) {
@@ -357,10 +351,10 @@
       System.out.println("Main$ClassWithClinit8's static initializer");
     }
 
-    // Note: not inlined from sgetAndInvokeStatic() but fully inlined from main().
-    static void someStaticMethod(Iterable<?> it) {
-      // We're not inlining invoke-interface at the moment.
+    static void $noinline$someStaticMethod(Iterable<?> it) {
       it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
     }
   }
 
@@ -377,7 +371,7 @@
   static void constClassSgetAndInvokeStatic(Iterable<?> it) {
     $opt$inline$ignoreClass(ClassWithClinit9.class);
     $opt$inline$ignoreInt(ClassWithClinit9.value);
-    ClassWithClinit9.someStaticMethod(it);
+    ClassWithClinit9.$noinline$someStaticMethod(it);
   }
 
   static class ClassWithClinit9 {
@@ -386,10 +380,10 @@
       System.out.println("Main$ClassWithClinit9's static initializer");
     }
 
-    // Note: not inlined from constClassSgetAndInvokeStatic() but fully inlined from main().
-    static void someStaticMethod(Iterable<?> it) {
-      // We're not inlining invoke-interface at the moment.
+    static void $noinline$someStaticMethod(Iterable<?> it) {
       it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
     }
   }
 
@@ -422,8 +416,9 @@
 
     static void inlinedForNull(Iterable<?> it) {
       if (it != null) {
-        // We're not inlining invoke-interface at the moment.
         it.iterator();
+        // We're not inlining throw at the moment.
+        if (doThrow) { throw new Error(""); }
       }
     }
   }
@@ -460,8 +455,11 @@
     }
 
     static void inlinedForNull(Iterable<?> it) {
-      // We're not inlining invoke-interface at the moment.
       it.iterator();
+      if (it != null) {
+        // We're not inlining throw at the moment.
+        if (doThrow) { throw new Error(""); }
+      }
     }
   }
 
@@ -494,8 +492,8 @@
 
     static void inlinedForNull(Iterable<?> it) {
       if (it != null) {
-        // We're not inlining invoke-interface at the moment.
-        it.iterator();
+        // We're not inlining throw at the moment.
+        if (doThrow) { throw new Error(""); }
       }
     }
   }
@@ -510,8 +508,9 @@
     }
 
     public static void $noinline$getIterator(Iterable<?> it) {
-      // We're not inlining invoke-interface at the moment.
       it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
     }
   }
 
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
index e5171f0..141054d 100644
--- a/test/480-checker-dead-blocks/src/Main.java
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -30,7 +30,7 @@
     return false;
   }
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -39,13 +39,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Sub
   /// CHECK-NOT:                      Phi
@@ -62,7 +62,7 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -71,13 +71,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Phi
@@ -94,10 +94,10 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (before)
   /// CHECK:                          Mul
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      Mul
 
   public static int testRemoveLoop(int x) {
@@ -109,11 +109,11 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      Return
   /// CHECK-DAG:                      Exit
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      Return
   /// CHECK-NOT:                      Exit
 
@@ -124,15 +124,15 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -143,16 +143,16 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -165,13 +165,13 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (before)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (after)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index 056f22c..e3617c7 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -23,7 +23,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
@@ -36,7 +36,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
@@ -73,7 +73,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -88,7 +88,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -129,7 +129,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -146,7 +146,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -194,7 +194,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -217,7 +217,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java
index 019d876..7d5fd4f 100644
--- a/test/485-checker-dce-switch/src/Main.java
+++ b/test/485-checker-dce-switch/src/Main.java
@@ -20,14 +20,14 @@
     return 5;
   }
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (before)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
   /// CHECK-DAG:    <<Const100:i\d+>> IntConstant 100
   /// CHECK-DAG:                      Return [<<Const100>>]
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int wholeSwitchDead(int j) {
@@ -60,14 +60,14 @@
     return l;
   }
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:                      Return [<<Const7>>]
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_InRange() {
@@ -96,14 +96,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<Const15:i\d+>> IntConstant 15
   /// CHECK-DAG:                      Return [<<Const15>>]
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_AboveRange() {
@@ -132,14 +132,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (before)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
   /// CHECK-DAG:     <<ConstM5:i\d+>> IntConstant -5
   /// CHECK-DAG:                      Return [<<ConstM5>>]
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_BelowRange() {
diff --git a/test/489-current-method-regression/src/Main.java b/test/489-current-method-regression/src/Main.java
index 7d102f5..285c41d 100644
--- a/test/489-current-method-regression/src/Main.java
+++ b/test/489-current-method-regression/src/Main.java
@@ -23,7 +23,7 @@
     if (a == 42) {
       // The class loading will be seen as dead code by
       // the optimizer.
-      Class c = Main.class;
+      Class<?> c = Main.class;
     }
     return new Main().bar();
   }
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index 8de6318..15d4dc0 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -69,7 +69,7 @@
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
         if (dexFile != null) {
-          Class clazz = (Class)method.invoke(dexFile, className, this, null);
+          Class<?> clazz = (Class<?>)method.invoke(dexFile, className, this, null);
           if (clazz != null) {
             return clazz;
           }
@@ -107,7 +107,8 @@
                 /* Load and initialize FirstSeenByMyClassLoader */
   /// CHECK:      LoadClass gen_clinit_check:true
                 /* Load and initialize System */
-  /// CHECK-NEXT: LoadClass gen_clinit_check:true
+  // There may be MipsComputeBaseMethodAddress here.
+  /// CHECK:      LoadClass gen_clinit_check:true
   /// CHECK-NEXT: StaticFieldGet
   // There may be HArmDexCacheArraysBase or HX86ComputeBaseMethodAddress here.
   /// CHECK:      LoadString
@@ -123,7 +124,7 @@
 public class Main {
   public static void main(String[] args) throws Exception {
     MyClassLoader o = new MyClassLoader();
-    Class foo = o.loadClass("LoadedByMyClassLoader");
+    Class<?> foo = o.loadClass("LoadedByMyClassLoader");
     Method m = foo.getDeclaredMethod("bar");
     m.invoke(null);
   }
diff --git a/test/497-inlining-and-class-loader/clear_dex_cache.cc b/test/497-inlining-and-class-loader/clear_dex_cache.cc
index 50d1a63..1597c4a 100644
--- a/test/497-inlining-and-class-loader/clear_dex_cache.cc
+++ b/test/497-inlining-and-class-loader/clear_dex_cache.cc
@@ -15,6 +15,7 @@
  */
 
 #include "art_method-inl.h"
+#include "base/enums.h"
 #include "jni.h"
 #include "scoped_thread_state_change.h"
 #include "stack.h"
@@ -44,8 +45,8 @@
   CHECK(array != nullptr);
   mirror::PointerArray* pointer_array = soa.Decode<mirror::PointerArray*>(array);
   for (size_t i = 0; i != num_methods; ++i) {
-    ArtMethod* method = mirror::DexCache::GetElementPtrSize(methods, i, sizeof(void*));
-    pointer_array->SetElementPtrSize(i, method, sizeof(void*));
+    ArtMethod* method = mirror::DexCache::GetElementPtrSize(methods, i, kRuntimePointerSize);
+    pointer_array->SetElementPtrSize(i, method, kRuntimePointerSize);
   }
   return array;
 }
@@ -61,8 +62,8 @@
   CHECK_EQ(methods != nullptr, old != nullptr);
   CHECK_EQ(num_methods, static_cast<size_t>(old->GetLength()));
   for (size_t i = 0; i != num_methods; ++i) {
-    ArtMethod* method = old->GetElementPtrSize<ArtMethod*>(i, sizeof(void*));
-    mirror::DexCache::SetElementPtrSize(methods, i, method, sizeof(void*));
+    ArtMethod* method = old->GetElementPtrSize<ArtMethod*>(i, kRuntimePointerSize);
+    mirror::DexCache::SetElementPtrSize(methods, i, method, kRuntimePointerSize);
   }
 }
 
diff --git a/test/497-inlining-and-class-loader/src/Main.java b/test/497-inlining-and-class-loader/src/Main.java
index 832b1f0..1e27e77 100644
--- a/test/497-inlining-and-class-loader/src/Main.java
+++ b/test/497-inlining-and-class-loader/src/Main.java
@@ -66,7 +66,7 @@
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
         if (dex != null) {
-          Class clazz = (Class)method.invoke(dex, className, this, null);
+          Class<?> clazz = (Class<?>)method.invoke(dex, className, this, null);
           if (clazz != null) {
             return clazz;
           }
@@ -92,7 +92,7 @@
 
     MyClassLoader o = new MyClassLoader();
     MyClassLoader.level1ClassLoader = new MyClassLoader();
-    Class foo = o.loadClass("LoadedByMyClassLoader");
+    Class<?> foo = o.loadClass("LoadedByMyClassLoader");
     Method m = foo.getDeclaredMethod("bar");
     try {
       m.invoke(null);
diff --git a/test/501-regression-packed-switch/info.txt b/test/501-regression-packed-switch/info.txt
index fbd93fa..988b220 100644
--- a/test/501-regression-packed-switch/info.txt
+++ b/test/501-regression-packed-switch/info.txt
@@ -1,2 +1,4 @@
 Regression test for the interpreter and optimizing's builder which used
 to trip when compiled code contained a packed switch with no targets.
+Regression test for the arm64 mterp miscalculating the switch table
+address, zero-extending a register instead of sign-extending.
diff --git a/test/501-regression-packed-switch/smali/Test.smali b/test/501-regression-packed-switch/smali/Test.smali
index 8756ed5..5a760c7 100644
--- a/test/501-regression-packed-switch/smali/Test.smali
+++ b/test/501-regression-packed-switch/smali/Test.smali
@@ -27,3 +27,28 @@
   .packed-switch 0x0
   .end packed-switch
 .end method
+
+.method public static PackedSwitchAfterData(I)I
+  .registers 1
+  goto :pswitch_instr
+
+  :case0
+  const/4 v0, 0x1
+  return v0
+
+  :pswitch_data
+  .packed-switch 0x0
+    :case0
+    :case1
+  .end packed-switch
+
+  :pswitch_instr
+  packed-switch v0, :pswitch_data
+  const/4 v0, 0x7
+  return v0
+
+  :case1
+  const/4 v0, 0x4
+  return v0
+
+.end method
diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java
index b80bc62..74c081a 100644
--- a/test/501-regression-packed-switch/src/Main.java
+++ b/test/501-regression-packed-switch/src/Main.java
@@ -24,10 +24,15 @@
 
   public static void main(String args[]) throws Exception {
     Class<?> c = Class.forName("Test");
-    Method m = c.getMethod("EmptyPackedSwitch", new Class[] { int.class });
+    Method m = c.getMethod("EmptyPackedSwitch", int.class);
     Integer result = (Integer) m.invoke(null, new Integer(42));
     if (result != 5) {
       throw new Error("Expected 5, got " + result);
     }
+    m = c.getMethod("PackedSwitchAfterData", int.class);
+    result = (Integer) m.invoke(null, new Integer(0));
+    if (result != 1) {
+      throw new Error("Expected 1, got " + result);
+    }
   }
 }
diff --git a/test/504-regression-baseline-entry/src/Main.java b/test/504-regression-baseline-entry/src/Main.java
index 2c9df28..284cbdc 100644
--- a/test/504-regression-baseline-entry/src/Main.java
+++ b/test/504-regression-baseline-entry/src/Main.java
@@ -24,7 +24,7 @@
 
   public static void main(String args[]) throws Exception {
     Class<?> c = Class.forName("Test");
-    Method m = c.getMethod("SingleGotoStart", (Class[]) null);
+    Method m = c.getMethod("SingleGotoStart");
     Integer result = (Integer) m.invoke(null);
     if (result != 5) {
       throw new Error("Expected 5, got " + result);
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 733a1dd..b0bffa5 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -1360,7 +1360,7 @@
 # Test that a throw-catch loop on monitor-exit is eliminated.
 # Note that we do not test this until after DCE which merges trivially split blocks.
 
-## CHECK-START: int Builder.testSynchronized(java.lang.Object) dead_code_elimination (after)
+## CHECK-START: int Builder.testSynchronized(java.lang.Object) dead_code_elimination$initial (after)
 ## CHECK:      flags "catch_block"
 ## CHECK-NOT:  end_block
 ## CHECK:      MonitorOperation kind:exit
diff --git a/test/510-checker-try-catch/src/Main.java b/test/510-checker-try-catch/src/Main.java
index 25cdc0e..d6dcd30 100644
--- a/test/510-checker-try-catch/src/Main.java
+++ b/test/510-checker-try-catch/src/Main.java
@@ -39,7 +39,7 @@
 
   public static void testMethod(String method) throws Exception {
     Class<?> c = Class.forName("Runtime");
-    Method m = c.getMethod(method, new Class[] { boolean.class, boolean.class });
+    Method m = c.getMethod(method, boolean.class, boolean.class);
 
     for (TestPath path : TestPath.values()) {
       Object[] arguments = new Object[] { path.arg1, path.arg2 };
diff --git a/test/517-checker-builder-fallthrough/src/Main.java b/test/517-checker-builder-fallthrough/src/Main.java
index 23d94e6..14170f5 100644
--- a/test/517-checker-builder-fallthrough/src/Main.java
+++ b/test/517-checker-builder-fallthrough/src/Main.java
@@ -20,7 +20,7 @@
 
   public static int runTest(int input) throws Exception {
     Class<?> c = Class.forName("TestCase");
-    Method m = c.getMethod("testCase", new Class[] { int.class });
+    Method m = c.getMethod("testCase", int.class);
     return (Integer) m.invoke(null, input);
   }
 
diff --git a/test/522-checker-regression-monitor-exit/smali/Test.smali b/test/522-checker-regression-monitor-exit/smali/Test.smali
index c8e9198..72583d2 100644
--- a/test/522-checker-regression-monitor-exit/smali/Test.smali
+++ b/test/522-checker-regression-monitor-exit/smali/Test.smali
@@ -17,11 +17,11 @@
 
 .super Ljava/lang/Object;
 
-## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination (before)
+## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination$initial (before)
 ## CHECK:         MonitorOperation [<<Param:l\d+>>] kind:enter
 ## CHECK:         MonitorOperation [<<Param>>]      kind:exit
 
-## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination (after)
+## CHECK-START: int Test.synchronizedHashCode(java.lang.Object) dead_code_elimination$initial (after)
 ## CHECK:         MonitorOperation [<<Param:l\d+>>] kind:enter
 ## CHECK:         MonitorOperation [<<Param>>]      kind:exit
 
diff --git a/test/522-checker-regression-monitor-exit/src/Main.java b/test/522-checker-regression-monitor-exit/src/Main.java
index c85ac96..a5e9512 100644
--- a/test/522-checker-regression-monitor-exit/src/Main.java
+++ b/test/522-checker-regression-monitor-exit/src/Main.java
@@ -40,7 +40,7 @@
       Integer result;
       try {
         Class<?> c = Class.forName("Test");
-        Method m = c.getMethod("synchronizedHashCode", new Class[] { Object.class });
+        Method m = c.getMethod("synchronizedHashCode", Object.class);
         result = (Integer) m.invoke(null, m_obj);
       } catch (Exception e) {
         System.err.println("Hash code query exception");
diff --git a/test/525-checker-arrays-and-fields/info.txt b/test/525-checker-arrays-and-fields/info.txt
deleted file mode 100644
index 3e16abf..0000000
--- a/test/525-checker-arrays-and-fields/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Test on (in)variant static and instance field and array references in loops.
diff --git a/test/525-checker-arrays-and-fields/src/Main.java b/test/525-checker-arrays-and-fields/src/Main.java
deleted file mode 100644
index a635a51..0000000
--- a/test/525-checker-arrays-and-fields/src/Main.java
+++ /dev/null
@@ -1,1099 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// Test on (in)variant static and instance field and array references in loops.
-//
-public class Main {
-
-  private static Object anObject = new Object();
-  private static Object anotherObject = new Object();
-
-  //
-  // Static fields.
-  //
-
-  private static boolean sZ;
-  private static byte sB;
-  private static char sC;
-  private static short sS;
-  private static int sI;
-  private static long sJ;
-  private static float sF;
-  private static double sD;
-  private static Object sL;
-
-  //
-  // Static arrays.
-  //
-
-  private static boolean[] sArrZ;
-  private static byte[] sArrB;
-  private static char[] sArrC;
-  private static short[] sArrS;
-  private static int[] sArrI;
-  private static long[] sArrJ;
-  private static float[] sArrF;
-  private static double[] sArrD;
-  private static Object[] sArrL;
-
-  //
-  // Instance fields.
-  //
-
-  private boolean mZ;
-  private byte mB;
-  private char mC;
-  private short mS;
-  private int mI;
-  private long mJ;
-  private float mF;
-  private double mD;
-  private Object mL;
-
-  //
-  // Instance arrays.
-  //
-
-  private boolean[] mArrZ;
-  private byte[] mArrB;
-  private char[] mArrC;
-  private short[] mArrS;
-  private int[] mArrI;
-  private long[] mArrJ;
-  private float[] mArrF;
-  private double[] mArrD;
-  private Object[] mArrL;
-
-  //
-  // Loops on static arrays with invariant static field references.
-  // The checker is used to ensure hoisting occurred.
-  //
-
-  /// CHECK-START: void Main.SInvLoopZ() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopZ() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = sZ;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopB() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopB() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = sB;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopC() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopC() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = sC;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopS() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopS() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = sS;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopI() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopI() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sI;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopJ() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopJ() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sJ;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopF() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopF() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sF;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopD() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopD() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sD;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopL() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopL() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = sL;
-    }
-  }
-
-  //
-  // Loops on static arrays with variant static field references.
-  // Incorrect hoisting is detected by incorrect outcome.
-  //
-
-  private static void SVarLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = sZ;
-      if (i == 10)
-        sZ = !sZ;
-    }
-  }
-
-  private static void SVarLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = sB;
-      if (i == 10)
-        sB++;
-    }
-  }
-
-  private static void SVarLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = sC;
-      if (i == 10)
-        sC++;
-    }
-  }
-
-  private static void SVarLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = sS;
-      if (i == 10)
-        sS++;
-    }
-  }
-
-  private static void SVarLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sI;
-      if (i == 10)
-        sI++;
-    }
-  }
-
-  private static void SVarLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sJ;
-      if (i == 10)
-        sJ++;
-    }
-  }
-
-  private static void SVarLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sF;
-      if (i == 10)
-        sF++;
-    }
-  }
-
-  private static void SVarLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sD;
-      if (i == 10)
-        sD++;
-    }
-  }
-
-  private static void SVarLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = sL;
-      if (i == 10)
-        sL = anotherObject;
-    }
-  }
-
-  //
-  // Loops on static arrays with a cross-over reference.
-  // Incorrect hoisting is detected by incorrect outcome.
-  // In addition, the checker is used to detect no hoisting.
-  //
-
-  /// CHECK-START: void Main.SCrossOverLoopZ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopZ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = !sArrZ[20];
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopB() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopB() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = (byte)(sArrB[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopC() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopC() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = (char)(sArrC[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopS() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopS() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = (short)(sArrS[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopI() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopI() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sArrI[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopJ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopJ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sArrJ[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopF() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopF() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sArrF[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopD() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopD() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sArrD[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopL() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopL() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = (sArrL[20] == anObject) ? anotherObject : anObject;
-    }
-  }
-
-  //
-  // Loops on instance arrays with invariant instance field references.
-  // The checker is used to ensure hoisting occurred.
-  //
-
-  /// CHECK-START: void Main.InvLoopZ() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopZ() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = mZ;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopB() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopB() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = mB;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopC() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopC() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = mC;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopS() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopS() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = mS;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopI() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopI() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mI;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopJ() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopJ() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mJ;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopF() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopF() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mF;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopD() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopD() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mD;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopL() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopL() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = mL;
-    }
-  }
-
-  //
-  // Loops on instance arrays with variant instance field references.
-  // Incorrect hoisting is detected by incorrect outcome.
-  //
-
-  private void VarLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = mZ;
-      if (i == 10)
-        mZ = !mZ;
-    }
-  }
-
-  private void VarLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = mB;
-      if (i == 10)
-        mB++;
-    }
-  }
-
-  private void VarLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = mC;
-      if (i == 10)
-        mC++;
-    }
-  }
-
-  private void VarLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = mS;
-      if (i == 10)
-        mS++;
-    }
-  }
-
-  private void VarLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mI;
-      if (i == 10)
-        mI++;
-    }
-  }
-
-  private void VarLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mJ;
-      if (i == 10)
-        mJ++;
-    }
-  }
-
-  private void VarLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mF;
-      if (i == 10)
-        mF++;
-    }
-  }
-
-  private void VarLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mD;
-      if (i == 10)
-        mD++;
-    }
-  }
-
-  private void VarLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = mL;
-      if (i == 10)
-        mL = anotherObject;
-    }
-  }
-
-  //
-  // Loops on instance arrays with a cross-over reference.
-  // Incorrect hoisting is detected by incorrect outcome.
-  // In addition, the checker is used to detect no hoisting.
-  //
-
-  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = !mArrZ[20];
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = (byte)(mArrB[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = (char)(mArrC[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = (short)(mArrS[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mArrI[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mArrJ[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mArrF[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mArrD[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = (mArrL[20] == anObject) ? anotherObject : anObject;
-    }
-  }
-
-  //
-  // Driver and testers.
-  //
-
-  public static void main(String[] args) {
-    DoStaticTests();
-    new Main().DoInstanceTests();
-  }
-
-  private static void DoStaticTests() {
-    // Type Z.
-    sZ = true;
-    sArrZ = new boolean[100];
-    SInvLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(true, sArrZ[i]);
-    }
-    SVarLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(i <= 10, sArrZ[i]);
-    }
-    SCrossOverLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(i <= 20, sArrZ[i]);
-    }
-    // Type B.
-    sB = 1;
-    sArrB = new byte[100];
-    SInvLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(1, sArrB[i]);
-    }
-    SVarLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(i <= 10 ? 1 : 2, sArrB[i]);
-    }
-    SCrossOverLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(i <= 20 ? 4 : 6, sArrB[i]);
-    }
-    // Type C.
-    sC = 2;
-    sArrC = new char[100];
-    SInvLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(2, sArrC[i]);
-    }
-    SVarLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(i <= 10 ? 2 : 3, sArrC[i]);
-    }
-    SCrossOverLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(i <= 20 ? 5 : 7, sArrC[i]);
-    }
-    // Type S.
-    sS = 3;
-    sArrS = new short[100];
-    SInvLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(3, sArrS[i]);
-    }
-    SVarLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(i <= 10 ? 3 : 4, sArrS[i]);
-    }
-    SCrossOverLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(i <= 20 ? 6 : 8, sArrS[i]);
-    }
-    // Type I.
-    sI = 4;
-    sArrI = new int[100];
-    SInvLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(4, sArrI[i]);
-    }
-    SVarLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(i <= 10 ? 4 : 5, sArrI[i]);
-    }
-    SCrossOverLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(i <= 20 ? 7 : 9, sArrI[i]);
-    }
-    // Type J.
-    sJ = 5;
-    sArrJ = new long[100];
-    SInvLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(5, sArrJ[i]);
-    }
-    SVarLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(i <= 10 ? 5 : 6, sArrJ[i]);
-    }
-    SCrossOverLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(i <= 20 ? 8 : 10, sArrJ[i]);
-    }
-    // Type F.
-    sF = 6.0f;
-    sArrF = new float[100];
-    SInvLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(6, sArrF[i]);
-    }
-    SVarLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(i <= 10 ? 6 : 7, sArrF[i]);
-    }
-    SCrossOverLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(i <= 20 ? 9 : 11, sArrF[i]);
-    }
-    // Type D.
-    sD = 7.0;
-    sArrD = new double[100];
-    SInvLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(7.0, sArrD[i]);
-    }
-    SVarLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(i <= 10 ? 7 : 8, sArrD[i]);
-    }
-    SCrossOverLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(i <= 20 ? 10 : 12, sArrD[i]);
-    }
-    // Type L.
-    sL = anObject;
-    sArrL = new Object[100];
-    SInvLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(anObject, sArrL[i]);
-    }
-    SVarLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(i <= 10 ? anObject : anotherObject, sArrL[i]);
-    }
-    SCrossOverLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(i <= 20 ? anObject : anotherObject, sArrL[i]);
-    }
-  }
-
-  private void DoInstanceTests() {
-    // Type Z.
-    mZ = true;
-    mArrZ = new boolean[100];
-    InvLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(true, mArrZ[i]);
-    }
-    VarLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(i <= 10, mArrZ[i]);
-    }
-    CrossOverLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(i <= 20, mArrZ[i]);
-    }
-    // Type B.
-    mB = 1;
-    mArrB = new byte[100];
-    InvLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(1, mArrB[i]);
-    }
-    VarLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(i <= 10 ? 1 : 2, mArrB[i]);
-    }
-    CrossOverLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(i <= 20 ? 4 : 6, mArrB[i]);
-    }
-    // Type C.
-    mC = 2;
-    mArrC = new char[100];
-    InvLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(2, mArrC[i]);
-    }
-    VarLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(i <= 10 ? 2 : 3, mArrC[i]);
-    }
-    CrossOverLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(i <= 20 ? 5 : 7, mArrC[i]);
-    }
-    // Type S.
-    mS = 3;
-    mArrS = new short[100];
-    InvLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(3, mArrS[i]);
-    }
-    VarLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(i <= 10 ? 3 : 4, mArrS[i]);
-    }
-    CrossOverLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(i <= 20 ? 6 : 8, mArrS[i]);
-    }
-    // Type I.
-    mI = 4;
-    mArrI = new int[100];
-    InvLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(4, mArrI[i]);
-    }
-    VarLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(i <= 10 ? 4 : 5, mArrI[i]);
-    }
-    CrossOverLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(i <= 20 ? 7 : 9, mArrI[i]);
-    }
-    // Type J.
-    mJ = 5;
-    mArrJ = new long[100];
-    InvLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(5, mArrJ[i]);
-    }
-    VarLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(i <= 10 ? 5 : 6, mArrJ[i]);
-    }
-    CrossOverLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(i <= 20 ? 8 : 10, mArrJ[i]);
-    }
-    // Type F.
-    mF = 6.0f;
-    mArrF = new float[100];
-    InvLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(6, mArrF[i]);
-    }
-    VarLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(i <= 10 ? 6 : 7, mArrF[i]);
-    }
-    CrossOverLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(i <= 20 ? 9 : 11, mArrF[i]);
-    }
-    // Type D.
-    mD = 7.0;
-    mArrD = new double[100];
-    InvLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(7.0, mArrD[i]);
-    }
-    VarLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(i <= 10 ? 7 : 8, mArrD[i]);
-    }
-    CrossOverLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(i <= 20 ? 10 : 12, mArrD[i]);
-    }
-    // Type L.
-    mL = anObject;
-    mArrL = new Object[100];
-    InvLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(anObject, mArrL[i]);
-    }
-    VarLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(i <= 10 ? anObject : anotherObject, mArrL[i]);
-    }
-    CrossOverLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(i <= 20 ? anObject : anotherObject, mArrL[i]);
-    }
-  }
-
-  private static void expectEquals(boolean expected, boolean result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(byte expected, byte result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(char expected, char result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(short expected, short result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(int expected, int result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(long expected, long result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(float expected, float result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(double expected, double result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(Object expected, Object result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-}
diff --git a/test/525-checker-arrays-fields1/expected.txt b/test/525-checker-arrays-fields1/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/525-checker-arrays-fields1/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/525-checker-arrays-fields1/info.txt b/test/525-checker-arrays-fields1/info.txt
new file mode 100644
index 0000000..7d0a088
--- /dev/null
+++ b/test/525-checker-arrays-fields1/info.txt
@@ -0,0 +1 @@
+Test on (in)variant static field and array references in loops.
diff --git a/test/525-checker-arrays-fields1/src/Main.java b/test/525-checker-arrays-fields1/src/Main.java
new file mode 100644
index 0000000..ba0476a
--- /dev/null
+++ b/test/525-checker-arrays-fields1/src/Main.java
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on (in)variant static field and array references in loops.
+//
+public class Main {
+
+  private static Object anObject = new Object();
+  private static Object anotherObject = new Object();
+
+  //
+  // Static fields.
+  //
+
+  private static boolean sZ;
+  private static byte sB;
+  private static char sC;
+  private static short sS;
+  private static int sI;
+  private static long sJ;
+  private static float sF;
+  private static double sD;
+  private static Object sL;
+
+  //
+  // Static arrays.
+  //
+
+  private static boolean[] sArrZ;
+  private static byte[] sArrB;
+  private static char[] sArrC;
+  private static short[] sArrS;
+  private static int[] sArrI;
+  private static long[] sArrJ;
+  private static float[] sArrF;
+  private static double[] sArrD;
+  private static Object[] sArrL;
+
+  //
+  // Loops on static arrays with invariant static field references.
+  // The checker is used to ensure hoisting occurred.
+  //
+
+  /// CHECK-START: void Main.InvLoopZ() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopZ() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = sZ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopB() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopB() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = sB;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopC() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopC() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = sC;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopS() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopS() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = sS;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopI() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopI() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sI;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopJ() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopJ() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sJ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopF() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopF() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sF;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopD() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopD() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sD;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopL() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopL() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = sL;
+    }
+  }
+
+  //
+  // Loops on static arrays with variant static field references.
+  // Incorrect hoisting is detected by incorrect outcome.
+  //
+
+  private static void VarLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = sZ;
+      if (i == 10)
+        sZ = !sZ;
+    }
+  }
+
+  private static void VarLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = sB;
+      if (i == 10)
+        sB++;
+    }
+  }
+
+  private static void VarLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = sC;
+      if (i == 10)
+        sC++;
+    }
+  }
+
+  private static void VarLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = sS;
+      if (i == 10)
+        sS++;
+    }
+  }
+
+  private static void VarLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sI;
+      if (i == 10)
+        sI++;
+    }
+  }
+
+  private static void VarLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sJ;
+      if (i == 10)
+        sJ++;
+    }
+  }
+
+  private static void VarLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sF;
+      if (i == 10)
+        sF++;
+    }
+  }
+
+  private static void VarLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sD;
+      if (i == 10)
+        sD++;
+    }
+  }
+
+  private static void VarLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = sL;
+      if (i == 10)
+        sL = anotherObject;
+    }
+  }
+
+  //
+  // Loops on static arrays with a cross-over reference.
+  // Incorrect hoisting is detected by incorrect outcome.
+  // In addition, the checker is used to detect no hoisting.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopZ() {
+    sArrZ[20] = false;
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = !sArrZ[20];
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopB() {
+    sArrB[20] = 11;
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = (byte)(sArrB[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopC() {
+    sArrC[20] = 11;
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = (char)(sArrC[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopS() {
+    sArrS[20] = 11;
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = (short)(sArrS[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopI() {
+    sArrI[20] = 11;
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sArrI[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopJ() {
+    sArrJ[20] = 11;
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sArrJ[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopF() {
+    sArrF[20] = 11;
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sArrF[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopD() {
+    sArrD[20] = 11;
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sArrD[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopL() {
+    sArrL[20] = anotherObject;
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = (sArrL[20] == anObject) ? anotherObject : anObject;
+    }
+  }
+
+  //
+  // False cross-over loops on static arrays with data types (I/F and J/D) that used
+  // to be aliased in an older version of the compiler. This alias has been removed,
+  // however, which enables hoisting the invariant array reference.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop1() {
+    sArrF[20] = -1;
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = (int) sArrF[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop2() {
+    sArrI[20] = -2;
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sArrI[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop3() {
+    sArrD[20] = -3;
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = (long) sArrD[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop4() {
+    sArrJ[20] = -4;
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sArrJ[20] - 2;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    DoStaticTests();
+    System.out.println("passed");
+  }
+
+  private static void DoStaticTests() {
+    // Type Z.
+    sZ = true;
+    sArrZ = new boolean[100];
+    InvLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(true, sArrZ[i]);
+    }
+    VarLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(i <= 10, sArrZ[i]);
+    }
+    CrossOverLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(i <= 20, sArrZ[i]);
+    }
+    // Type B.
+    sB = 1;
+    sArrB = new byte[100];
+    InvLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(1, sArrB[i]);
+    }
+    VarLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(i <= 10 ? 1 : 2, sArrB[i]);
+    }
+    CrossOverLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrB[i]);
+    }
+    // Type C.
+    sC = 2;
+    sArrC = new char[100];
+    InvLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(2, sArrC[i]);
+    }
+    VarLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(i <= 10 ? 2 : 3, sArrC[i]);
+    }
+    CrossOverLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrC[i]);
+    }
+    // Type S.
+    sS = 3;
+    sArrS = new short[100];
+    InvLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(3, sArrS[i]);
+    }
+    VarLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(i <= 10 ? 3 : 4, sArrS[i]);
+    }
+    CrossOverLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrS[i]);
+    }
+    // Type I.
+    sI = 4;
+    sArrI = new int[100];
+    InvLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(4, sArrI[i]);
+    }
+    VarLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(i <= 10 ? 4 : 5, sArrI[i]);
+    }
+    CrossOverLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrI[i]);
+    }
+    // Type J.
+    sJ = 5;
+    sArrJ = new long[100];
+    InvLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(5, sArrJ[i]);
+    }
+    VarLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(i <= 10 ? 5 : 6, sArrJ[i]);
+    }
+    CrossOverLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrJ[i]);
+    }
+    // Type F.
+    sF = 6.0f;
+    sArrF = new float[100];
+    InvLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(6, sArrF[i]);
+    }
+    VarLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(i <= 10 ? 6 : 7, sArrF[i]);
+    }
+    CrossOverLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrF[i]);
+    }
+    // Type D.
+    sD = 7.0;
+    sArrD = new double[100];
+    InvLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(7.0, sArrD[i]);
+    }
+    VarLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(i <= 10 ? 7 : 8, sArrD[i]);
+    }
+    CrossOverLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrD[i]);
+    }
+    // Type L.
+    sL = anObject;
+    sArrL = new Object[100];
+    InvLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(anObject, sArrL[i]);
+    }
+    VarLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(i <= 10 ? anObject : anotherObject, sArrL[i]);
+    }
+    CrossOverLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(i <= 20 ? anObject : anotherObject, sArrL[i]);
+    }
+    // False cross-over.
+    FalseCrossOverLoop1();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(-3, sArrI[i]);
+    }
+    FalseCrossOverLoop2();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(-4, sArrF[i]);
+    }
+    FalseCrossOverLoop3();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(-5, sArrJ[i]);
+    }
+    FalseCrossOverLoop4();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(-6, sArrD[i]);
+    }
+  }
+
+  private static void expectEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/525-checker-arrays-fields2/expected.txt b/test/525-checker-arrays-fields2/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/525-checker-arrays-fields2/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/525-checker-arrays-fields2/info.txt b/test/525-checker-arrays-fields2/info.txt
new file mode 100644
index 0000000..3464e54
--- /dev/null
+++ b/test/525-checker-arrays-fields2/info.txt
@@ -0,0 +1 @@
+Test on (in)variant instance field and array references in loops.
diff --git a/test/525-checker-arrays-fields2/src/Main.java b/test/525-checker-arrays-fields2/src/Main.java
new file mode 100644
index 0000000..2aa40fc
--- /dev/null
+++ b/test/525-checker-arrays-fields2/src/Main.java
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on (in)variant instance field and array references in loops.
+//
+public class Main {
+
+  private static Object anObject = new Object();
+  private static Object anotherObject = new Object();
+
+  //
+  // Instance fields.
+  //
+
+  private boolean mZ;
+  private byte mB;
+  private char mC;
+  private short mS;
+  private int mI;
+  private long mJ;
+  private float mF;
+  private double mD;
+  private Object mL;
+
+  //
+  // Instance arrays.
+  //
+
+  private boolean[] mArrZ;
+  private byte[] mArrB;
+  private char[] mArrC;
+  private short[] mArrS;
+  private int[] mArrI;
+  private long[] mArrJ;
+  private float[] mArrF;
+  private double[] mArrD;
+  private Object[] mArrL;
+
+  //
+  // Loops on instance arrays with invariant instance field references.
+  // The checker is used to ensure hoisting occurred.
+  //
+
+  /// CHECK-START: void Main.InvLoopZ() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopZ() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = mZ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopB() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopB() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = mB;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopC() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopC() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = mC;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopS() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopS() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = mS;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopI() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopI() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mI;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopJ() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopJ() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mJ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopF() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopF() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mF;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopD() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopD() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mD;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopL() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopL() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = mL;
+    }
+  }
+
+  //
+  // Loops on instance arrays with variant instance field references.
+  // Incorrect hoisting is detected by incorrect outcome.
+  //
+
+  private void VarLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = mZ;
+      if (i == 10)
+        mZ = !mZ;
+    }
+  }
+
+  private void VarLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = mB;
+      if (i == 10)
+        mB++;
+    }
+  }
+
+  private void VarLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = mC;
+      if (i == 10)
+        mC++;
+    }
+  }
+
+  private void VarLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = mS;
+      if (i == 10)
+        mS++;
+    }
+  }
+
+  private void VarLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mI;
+      if (i == 10)
+        mI++;
+    }
+  }
+
+  private void VarLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mJ;
+      if (i == 10)
+        mJ++;
+    }
+  }
+
+  private void VarLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mF;
+      if (i == 10)
+        mF++;
+    }
+  }
+
+  private void VarLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mD;
+      if (i == 10)
+        mD++;
+    }
+  }
+
+  private void VarLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = mL;
+      if (i == 10)
+        mL = anotherObject;
+    }
+  }
+
+  //
+  // Loops on instance arrays with a cross-over reference.
+  // Incorrect hoisting is detected by incorrect outcome.
+  // In addition, the checker is used to detect no hoisting.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopZ() {
+    mArrZ[20] = false;
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = !mArrZ[20];
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopB() {
+    mArrB[20] = 111;
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = (byte)(mArrB[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopC() {
+    mArrC[20] = 111;
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = (char)(mArrC[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopS() {
+    mArrS[20] = 111;
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = (short)(mArrS[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopI() {
+    mArrI[20] = 111;
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mArrI[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopJ() {
+    mArrJ[20] = 111;
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mArrJ[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopF() {
+    mArrF[20] = 111;
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mArrF[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopD() {
+    mArrD[20] = 111;
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mArrD[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopL() {
+    mArrL[20] = anotherObject;
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = (mArrL[20] == anObject) ? anotherObject : anObject;
+    }
+  }
+
+  //
+  // False cross-over loops on instance arrays with data types (I/F and J/D) that used
+  // to be aliased in an older version of the compiler. This alias has been removed,
+  // however, which enables hoisting the invariant array reference.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop1() {
+    mArrF[20] = -1;
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = (int) mArrF[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop2() {
+    mArrI[20] = -2;
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mArrI[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop3() {
+    mArrD[20] = -3;
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = (long) mArrD[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop4() {
+    mArrJ[20] = -4;
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mArrJ[20] - 2;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    new Main().DoInstanceTests();
+    System.out.println("passed");
+  }
+
+  private void DoInstanceTests() {
+    // Type Z.
+    mZ = true;
+    mArrZ = new boolean[100];
+    InvLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(true, mArrZ[i]);
+    }
+    VarLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(i <= 10, mArrZ[i]);
+    }
+    CrossOverLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(i <= 20, mArrZ[i]);
+    }
+    // Type B.
+    mB = 1;
+    mArrB = new byte[100];
+    InvLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(1, mArrB[i]);
+    }
+    VarLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(i <= 10 ? 1 : 2, mArrB[i]);
+    }
+    CrossOverLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrB[i]);
+    }
+    // Type C.
+    mC = 2;
+    mArrC = new char[100];
+    InvLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(2, mArrC[i]);
+    }
+    VarLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(i <= 10 ? 2 : 3, mArrC[i]);
+    }
+    CrossOverLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrC[i]);
+    }
+    // Type S.
+    mS = 3;
+    mArrS = new short[100];
+    InvLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(3, mArrS[i]);
+    }
+    VarLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(i <= 10 ? 3 : 4, mArrS[i]);
+    }
+    CrossOverLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrS[i]);
+    }
+    // Type I.
+    mI = 4;
+    mArrI = new int[100];
+    InvLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(4, mArrI[i]);
+    }
+    VarLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(i <= 10 ? 4 : 5, mArrI[i]);
+    }
+    CrossOverLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrI[i]);
+    }
+    // Type J.
+    mJ = 5;
+    mArrJ = new long[100];
+    InvLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(5, mArrJ[i]);
+    }
+    VarLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(i <= 10 ? 5 : 6, mArrJ[i]);
+    }
+    CrossOverLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrJ[i]);
+    }
+    // Type F.
+    mF = 6.0f;
+    mArrF = new float[100];
+    InvLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(6, mArrF[i]);
+    }
+    VarLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(i <= 10 ? 6 : 7, mArrF[i]);
+    }
+    CrossOverLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrF[i]);
+    }
+    // Type D.
+    mD = 7.0;
+    mArrD = new double[100];
+    InvLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(7.0, mArrD[i]);
+    }
+    VarLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(i <= 10 ? 7 : 8, mArrD[i]);
+    }
+    CrossOverLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrD[i]);
+    }
+    // Type L.
+    mL = anObject;
+    mArrL = new Object[100];
+    InvLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(anObject, mArrL[i]);
+    }
+    VarLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(i <= 10 ? anObject : anotherObject, mArrL[i]);
+    }
+    CrossOverLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(i <= 20 ? anObject : anotherObject, mArrL[i]);
+    }
+    // False cross-over.
+    FalseCrossOverLoop1();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(-3, mArrI[i]);
+    }
+    FalseCrossOverLoop2();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(-4, mArrF[i]);
+    }
+    FalseCrossOverLoop3();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(-5, mArrJ[i]);
+    }
+    FalseCrossOverLoop4();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(-6, mArrD[i]);
+    }
+  }
+
+  private static void expectEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
index ead9446..9435ef1 100644
--- a/test/527-checker-array-access-split/src/Main.java
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -34,9 +34,21 @@
   /// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (after)
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
 
+
+  /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (before)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (after)
+  /// CHECK:           <<Array:l\d+>>         NullCheck
+  /// CHECK:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-NOT:                              IntermediateAddress
+  /// CHECK:                                  ArrayGet [<<Array>>,<<Index>>]
+
   public static int constantIndexGet(int array[]) {
     return array[1];
   }
@@ -55,10 +67,23 @@
   /// CHECK:             <<Const2:i\d+>>        IntConstant 2
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
 
 
+  /// CHECK-START-ARM:   void Main.constantIndexSet(int[]) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
+  /// CHECK-START-ARM:   void Main.constantIndexSet(int[]) instruction_simplifier_arm (after)
+  /// CHECK:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
   public static void constantIndexSet(int array[]) {
     array[1] = 2;
   }
@@ -76,7 +101,20 @@
   /// CHECK:             <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArrayGet [<<Address>>,<<Index>>]
+
+
+  /// CHECK-START-ARM:   int Main.get(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  /// CHECK-START-ARM:   int Main.get(int[], int) instruction_simplifier_arm (after)
+  /// CHECK:             <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArrayGet [<<Address>>,<<Index>>]
 
   public static int get(int array[], int index) {
@@ -102,7 +140,26 @@
   /// CHECK:             <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address>>,<<Index>>,<<Arg>>]
+
+
+  /// CHECK-START-ARM:   void Main.set(int[], int, int) instruction_simplifier_arm (before)
+  /// CHECK:                                    ParameterValue
+  /// CHECK:                                    ParameterValue
+  /// CHECK:             <<Arg:i\d+>>           ParameterValue
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Arg>>]
+
+  /// CHECK-START-ARM:   void Main.set(int[], int, int) instruction_simplifier_arm (after)
+  /// CHECK:                                    ParameterValue
+  /// CHECK:                                    ParameterValue
+  /// CHECK:             <<Arg:i\d+>>           ParameterValue
+  /// CHECK:             <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address>>,<<Index>>,<<Arg>>]
 
   public static void set(int array[], int index, int value) {
@@ -126,23 +183,53 @@
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after)
+  /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
 
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) GVN$after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
   public static void getSet(int array[], int index) {
     array[index] = array[index] + 1;
   }
@@ -166,23 +253,57 @@
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    NewArray
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
+  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    NewArray
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:                                    ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) GVN$after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:                                    ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
   public static int[] accrossGC(int array[], int index) {
@@ -196,6 +317,14 @@
    * Test that the intermediate address is shared between array accesses after
    * the bounds check have been removed by BCE.
    */
+  // For checker tests `instruction_simplifier_<arch> (after)` below, by the time we reach
+  // the architecture-specific instruction simplifier, BCE has removed the bounds checks in
+  // the loop.
+
+  // Note that we do not care that the `DataOffset` is `12`. But if we do not
+  // specify it and any other `IntConstant` appears before that instruction,
+  // checker will match the previous `IntConstant`, and we will thus fail the
+  // check.
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before)
   /// CHECK:             <<Const1:i\d+>>        IntConstant 1
@@ -207,14 +336,6 @@
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
 
-  // By the time we reach the architecture-specific instruction simplifier, BCE
-  // has removed the bounds checks in the loop.
-
-  // Note that we do not care that the `DataOffset` is `12`. But if we do not
-  // specify it and any other `IntConstant` appears before that instruction,
-  // checker will match the previous `IntConstant`, and we will thus fail the
-  // check.
-
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
@@ -222,23 +343,60 @@
   /// CHECK:             <<Index:i\d+>>         Phi
   /// CHECK:                                    If
   //  -------------- Loop
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after)
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
   /// CHECK:             <<Index:i\d+>>         Phi
   /// CHECK:                                    If
   //  -------------- Loop
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() GVN$after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
 
   public static int canMergeAfterBCE1() {
@@ -279,15 +437,15 @@
   /// CHECK:                                    If
   //  -------------- Loop
   /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
-  /// CHECK-DAG:         <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
-  /// CHECK-DAG:         <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
-  /// CHECK:             <<Address3:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address3:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
@@ -295,7 +453,7 @@
   /// CHECK:                                    If
   //  -------------- Loop
   /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
-  /// CHECK-DAG:         <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
@@ -303,9 +461,56 @@
 
   // There should be only one intermediate address computation in the loop.
 
-  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
-  /// CHECK:                                    Arm64IntermediateAddress
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN$after_arch (after)
+  /// CHECK:                                    IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
+
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Array>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:         <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:             <<Address3:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN$after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN$after_arch (after)
+  /// CHECK:                                    IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
 
   public static int canMergeAfterBCE2() {
     int[] array = {0, 1, 2, 3};
@@ -315,6 +520,37 @@
     return array[array.length - 1];
   }
 
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (before)
+  /// CHECK-DAG:         <<Array1:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array2:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array3:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                ArrayGet [<<Array1>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array2>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array3>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Array1:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array2:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array3:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                ArrayGet [<<Array1>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array2>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array3>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+  /// CHECK-NOT:                                IntermediateAddress
+  public static int checkLongFloatDouble() {
+    long[] array_long = {0, 1, 2, 3};
+    float[] array_float = {(float)0.0, (float)1.0, (float)2.0, (float)3.0};
+    double[] array_double = {0.0, 1.0, 2.0, 3.0};
+    double s = 0.0;
+
+    for (int i = 0; i < 4; i++) {
+      s += (double)array_long[i] + (double)array_float[i] + array_double[i];
+    }
+    return (int)s;
+  }
 
   public static void main(String[] args) {
     int[] array = {123, 456, 789};
@@ -337,5 +573,7 @@
 
     assertIntEquals(4, canMergeAfterBCE1());
     assertIntEquals(6, canMergeAfterBCE2());
+
+    assertIntEquals(18, checkLongFloatDouble());
   }
 }
diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java
index 872fa6d..7b5cbc1 100644
--- a/test/529-checker-unresolved/src/Main.java
+++ b/test/529-checker-unresolved/src/Main.java
@@ -77,6 +77,16 @@
     expectEquals(123456789123456789f, UnresolvedClass.staticFloat);
     expectEquals(123456789123456789d, UnresolvedClass.staticDouble);
     expectEquals(o, UnresolvedClass.staticObject);
+
+    // Check "large" values.
+
+    UnresolvedClass.staticByte = (byte)-1;
+    UnresolvedClass.staticChar = (char)32768;
+    UnresolvedClass.staticInt = -1;
+
+    expectEquals((byte)-1, UnresolvedClass.staticByte);
+    expectEquals((char)32768, UnresolvedClass.staticChar);
+    expectEquals(-1, UnresolvedClass.staticInt);
   }
 
   /// CHECK-START: void Main.callUnresolvedInstanceFieldAccess(UnresolvedClass) register (before)
@@ -114,6 +124,33 @@
     expectEquals(o, c.instanceObject);
   }
 
+  /// CHECK-START: void Main.callUnresolvedNull(UnresolvedClass) register (before)
+  /// CHECK-NOT: NullCheck
+  static public void callUnresolvedNull(UnresolvedClass c) {
+    int x = 0;
+    try {
+      x = c.instanceInt;
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-1, x);
+    try {
+      c.instanceInt = -1;
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-2, x);
+    try {
+      c.virtualMethod();
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-3, x);
+  }
+
   static public void testInstanceOf(Object o) {
     if (o instanceof UnresolvedSuperClass) {
       System.out.println("instanceof ok");
@@ -136,6 +173,7 @@
     callInvokeUnresolvedSuper(m);
     callUnresolvedStaticFieldAccess();
     callUnresolvedInstanceFieldAccess(c);
+    callUnresolvedNull(null);
     testInstanceOf(m);
     testCheckCast(m);
     testLicm(2);
@@ -185,7 +223,7 @@
     }
   }
 
-    public static void expectEquals(float expected, float result) {
+  public static void expectEquals(float expected, float result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
diff --git a/test/530-checker-loops1/src/Main.java b/test/530-checker-loops1/src/Main.java
index 948a7b7..dde4d62 100644
--- a/test/530-checker-loops1/src/Main.java
+++ b/test/530-checker-loops1/src/Main.java
@@ -562,7 +562,7 @@
   //
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
+  /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnTwoArrayLengths(int n) {
     int[] a = new int[n];
     for (int i = 0; i < a.length; i++) {
@@ -604,7 +604,7 @@
   //
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
+  /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnParameter(int n) {
     int[] a = new int[n];
     for (int i = 0; i < n; i++) {
@@ -619,56 +619,56 @@
     }
   }
 
-  /// CHECK-START: void Main.linearTriangularVariationsInnerStrict(int) BCE (before)
+  /// CHECK-START: void Main.linearTriangularStrictLower(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   //
-  /// CHECK-START: void Main.linearTriangularVariationsInnerStrict(int) BCE (after)
+  /// CHECK-START: void Main.linearTriangularStrictLower(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
-  private static void linearTriangularVariationsInnerStrict(int n) {
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularStrictLower(int n) {
     int[] a = new int[n];
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < i; j++) {
         a[j] += 1;
       }
-      for (int j = i - 1; j > -1; j--) {
+      for (int j = i - 1; j >= 0; j--) {
         a[j] += 1;
       }
       for (int j = i; j < n; j++) {
         a[j] += 1;
       }
-      for (int j = n - 1; j > i - 1; j--) {
+      for (int j = n - 1; j >= i; j--) {
         a[j] += 1;
       }
     }
     verifyTriangular(a);
   }
 
-  /// CHECK-START: void Main.linearTriangularVariationsInnerNonStrict(int) BCE (before)
+  /// CHECK-START: void Main.linearTriangularStrictUpper(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   //
-  /// CHECK-START: void Main.linearTriangularVariationsInnerNonStrict(int) BCE (after)
+  /// CHECK-START: void Main.linearTriangularStrictUpper(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
-  private static void linearTriangularVariationsInnerNonStrict(int n) {
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularStrictUpper(int n) {
     int[] a = new int[n];
     for (int i = 0; i < n; i++) {
-      for (int j = 0; j <= i - 1; j++) {
+      for (int j = 0; j <= i; j++) {
         a[j] += 1;
       }
-      for (int j = i - 1; j >= 0; j--) {
+      for (int j = i; j >= 0; j--) {
         a[j] += 1;
       }
-      for (int j = i; j <= n - 1; j++) {
+      for (int j = i + 1; j < n; j++) {
         a[j] += 1;
       }
-      for (int j = n - 1; j >= i; j--) {
+      for (int j = n - 1; j >= i + 1; j--) {
         a[j] += 1;
       }
     }
@@ -802,8 +802,8 @@
     linearTriangularOnTwoArrayLengths(10);
     linearTriangularOnOneArrayLength(10);
     linearTriangularOnParameter(10);
-    linearTriangularVariationsInnerStrict(10);
-    linearTriangularVariationsInnerNonStrict(10);
+    linearTriangularStrictLower(10);
+    linearTriangularStrictUpper(10);
     {
       int[] t = linearTriangularOOB();
       for (int i = 0; i < 200; i++) {
diff --git a/test/530-checker-loops2/src/Main.java b/test/530-checker-loops2/src/Main.java
index c644692..7acf008 100644
--- a/test/530-checker-loops2/src/Main.java
+++ b/test/530-checker-loops2/src/Main.java
@@ -31,7 +31,7 @@
   //
   /// CHECK-START: void Main.bubble(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
+  /// CHECK-NOT: Deoptimize
   private static void bubble(int[] a) {
     for (int i = a.length; --i >= 0;) {
       for (int j = 0; j < i; j++) {
@@ -301,6 +301,53 @@
     } while (-1 <= i);
   }
 
+  /// CHECK-START: void Main.justRightTriangular1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justRightTriangular1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void justRightTriangular1() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 5; i <= Integer.MIN_VALUE + 10; i++) {
+      for (int j = Integer.MIN_VALUE + 4; j < i - 5; j++) {
+        sResult += a[j - (Integer.MIN_VALUE + 4)];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.justRightTriangular2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justRightTriangular2() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void justRightTriangular2() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 5; i <= 10; i++) {
+      for (int j = 4; j < i - 5; j++) {
+        sResult += a[j - 4];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.justOOBTriangular() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justOOBTriangular() BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.justOOBTriangular() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void justOOBTriangular() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 4; i <= 10; i++) {
+      for (int j = 4; j < i - 5; j++) {
+        sResult += a[j - 4];
+      }
+    }
+  }
+
   /// CHECK-START: void Main.hiddenOOB1(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   //
@@ -315,7 +362,6 @@
       // Dangerous loop where careless static range analysis would yield strict upper bound
       // on index j of 5. When, for instance, lo and thus i = -2147483648, the upper bound
       // becomes really positive due to arithmetic wrap-around, causing OOB.
-      // Dynamic BCE is feasible though, since it checks the range.
       for (int j = 4; j < i - 5; j++) {
         sResult += a[j - 4];
       }
@@ -336,13 +382,32 @@
       // Dangerous loop where careless static range analysis would yield strict lower bound
       // on index j of 5. When, for instance, hi and thus i = 2147483647, the upper bound
       // becomes really negative due to arithmetic wrap-around, causing OOB.
-      // Dynamic BCE is feasible though, since it checks the range.
       for (int j = 6; j > i + 5; j--) {
         sResult += a[j - 6];
       }
     }
   }
 
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void hiddenOOB3(int hi) {
+    int[] a = { 11 } ;
+    for (int i = -1; i <= hi; i++) {
+      // Dangerous loop where careless static range analysis would yield strict lower bound
+      // on index j of 0. For large i, the initial value of j becomes really negative due
+      // to arithmetic wrap-around, causing OOB.
+      for (int j = i + 1; j < 1; j++) {
+        sResult += a[j];
+      }
+    }
+  }
+
   /// CHECK-START: void Main.hiddenInfiniteOOB() BCE (before)
   /// CHECK-DAG: BoundsCheck
   //
@@ -376,7 +441,6 @@
     for (int i = -1; i <= 0; i++) {
       // Dangerous loop similar as above where the loop is now finite, but the
       // loop still goes out of bounds for i = -1 due to the large upper bound.
-      // Dynamic BCE is feasible though, since it checks the range.
       for (int j = -4; j < 2147483646 * i - 3; j++) {
         sResult += a[j + 4];
       }
@@ -432,6 +496,25 @@
     }
   }
 
+  /// CHECK-START: int Main.doNotHoist(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.doNotHoist(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static int doNotHoist(int[] a) {
+     int n = a.length;
+     int x = 0;
+     // BCE applies, but hoisting would crash the loop.
+     for (int i = -10000; i < 10000; i++) {
+       for (int j = 0; j <= 1; j++) {
+         if (0 <= i && i < n)
+           x += a[i];
+       }
+    }
+    return x;
+  }
+
+
   /// CHECK-START: int[] Main.add() BCE (before)
   /// CHECK-DAG: BoundsCheck
   //
@@ -687,7 +770,7 @@
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
   //  Order matters:
   /// CHECK:              Deoptimize loop:<<Loop:B\d+>>
-  //  CHECK-NOT:          Goto       loop:<<Loop>>
+  /// CHECK-NOT:          Goto       loop:<<Loop>>
   /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
   /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
   /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
@@ -710,8 +793,8 @@
         // making them a candidate for deoptimization based on constant indices.
         // Compiler should ensure the array loads are not subsequently hoisted
         // "above" the deoptimization "barrier" on the bounds.
-        a[0][i] = 1;
-        a[1][i] = 2;
+        a[1][i] = 1;
+        a[2][i] = 2;
         a[99][i] = 3;
       }
     }
@@ -839,6 +922,8 @@
     expectEquals(55, justRightDown1());
     expectEquals(55, justRightDown2());
     expectEquals(55, justRightDown3());
+
+    // Large bounds OOB.
     sResult = 0;
     try {
       justOOBUp();
@@ -890,6 +975,23 @@
     }
     expectEquals(1055, sResult);
 
+    // Triangular.
+    sResult = 0;
+    justRightTriangular1();
+    expectEquals(1, sResult);
+    if (HEAVY) {
+      sResult = 0;
+      justRightTriangular2();
+      expectEquals(1, sResult);
+    }
+    sResult = 0;
+    try {
+      justOOBTriangular();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1001, sResult);
+
     // Hidden OOB.
     sResult = 0;
     try {
@@ -912,6 +1014,15 @@
       sResult += 1000;
     }
     expectEquals(1, sResult);
+    sResult = 0;
+    try {
+      hiddenOOB3(-1);  // no OOB
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(11, sResult);
+
+    // Expensive hidden OOB test.
     if (HEAVY) {
       sResult = 0;
       try {
@@ -920,7 +1031,16 @@
         sResult += 1000;
       }
       expectEquals(1002, sResult);
+      sResult = 0;
+      try {
+        hiddenOOB3(2147483647);  // OOB
+      } catch (ArrayIndexOutOfBoundsException e) {
+        sResult += 1000;
+      }
+      expectEquals(1011, sResult);
     }
+
+    // More hidden OOB.
     sResult = 0;
     try {
       hiddenInfiniteOOB();
@@ -966,6 +1086,9 @@
       expectEquals(i < 128 ? i : 0, a200[i]);
     }
 
+    // No hoisting after BCE.
+    expectEquals(110, doNotHoist(x));
+
     // Addition.
     {
       int[] e1 ={ 1, 2, 3, 4, 4, 4, 4, 3, 2, 1 };
@@ -1042,11 +1165,11 @@
     a = new int[100][10];
     expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
     for (int i = 0; i < 10; i++) {
-      expectEquals((i % 10) != 0 ? 1 : 0, a[0][i]);
-      expectEquals((i % 10) != 0 ? 2 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 1 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 2 : 0, a[2][i]);
       expectEquals((i % 10) != 0 ? 3 : 0, a[99][i]);
     }
-    a = new int[2][10];
+    a = new int[3][10];
     sResult = 0;
     try {
       expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
@@ -1054,8 +1177,8 @@
       sResult = 1;
     }
     expectEquals(1, sResult);
-    expectEquals(a[0][1], 1);
-    expectEquals(a[1][1], 2);
+    expectEquals(a[1][1], 1);
+    expectEquals(a[2][1], 2);
 
     // Dynamic BCE combined with constant indices of all types.
     boolean[] x1 = { true };
diff --git a/test/530-checker-loops3/expected.txt b/test/530-checker-loops3/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/530-checker-loops3/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/530-checker-loops3/info.txt b/test/530-checker-loops3/info.txt
new file mode 100644
index 0000000..07d99a3
--- /dev/null
+++ b/test/530-checker-loops3/info.txt
@@ -0,0 +1 @@
+Test on loop optimizations, in particular loop-based dynamic bce.
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
new file mode 100644
index 0000000..6b5c657
--- /dev/null
+++ b/test/530-checker-loops3/src/Main.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop optimizations, in particular dynamic BCE. In all cases,
+// bounds check on a[] is resolved statically. Bounds checks on b[]
+// exercise various different scenarios. In all cases, loop-based
+// dynamic BCE is better than the dominator-based BCE, since it
+// generates the test outside the loop.
+//
+public class Main {
+
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneConstantIndex(int[] a, int[] b) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[1];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleConstantIndices(int[] a, int[] b) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[0] + b[1] + b[2];
+    }
+  }
+
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneInvariantIndex(int[] a, int[] b, int c) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[c];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleInvariantIndices(int[] a, int[] b, int c) {
+    // Dynamic bce on b requires three deopts: one null and two bounds.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[c-1] + b[c] + b[c+1];
+    }
+  }
+
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneUnitStride(int[] a, int[] b) {
+    // Dynamic bce on b requires three deopts: one null and two bounds.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[i];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleUnitStrides(int[] a, int[] b) {
+    // Dynamic bce on b requires four deopts: one null and three bounds.
+    // One redundant deopt is removed by simplifier.
+    // TODO: range information could remove another
+    for (int i = 1; i < a.length - 1; i++) {
+      a[i] = b[i-1] + b[i] + b[i+1];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleUnitStridesConditional(int[] a, int[] b) {
+    // Dynamic bce on b requires four deopts: one null and three bounds.
+    // The two conditional references may be included, since they are in range.
+    // One redundant deopt is removed by simplifier.
+    for (int i = 2; i < a.length - 2; i++) {
+      int t = b[i-2] + b[i] + b[i+2] + (((i & 1) == 0) ? b[i+1] : b[i-1]);
+      a[i] = t;
+    }
+  }
+
+  /// CHECK-START: void Main.shifter(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.shifter(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shifter(int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shifter(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void shifter(int[] x) {
+    // Real-life example: should have four deopts: one null and three bounds.
+    // Two redundant deopts are removed by simplifier.
+    for (int i = 16; i < 80; i++) {
+      int t = x[i - 3] ^ x[i - 8] ^ x[i - 14] ^ x[i - 16];
+      x[i] = t << 1 | t >>> 31;
+    }
+  }
+
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void stencil(int[] array, int start, int end) {
+    // Real-life example: should have four deopts: one null and three bounds.
+    for (int i = end; i >= start; i--) {
+      array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
+    }
+  }
+
+  //
+  // Verifier.
+  //
+
+  public static void main(String[] args) {
+    int[] a = new int[10];
+    int b[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int b1[] = { 100 };
+
+    oneConstantIndex(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(2, a[i]);;
+    }
+    try {
+      oneConstantIndex(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleConstantIndices(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(6, a[i]);;
+    }
+    try {
+      multipleConstantIndices(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    oneInvariantIndex(a, b, 1);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(2, a[i]);;
+    }
+    try {
+      oneInvariantIndex(a, b1, 1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleInvariantIndices(a, b, 1);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(6, a[i]);;
+    }
+    try {
+      multipleInvariantIndices(a, b1, 1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    oneUnitStride(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(i + 1, a[i]);;
+    }
+    try {
+      oneUnitStride(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+      expectEquals(100, a[0]);;
+    }
+
+    multipleUnitStrides(a, b);
+    for (int i = 1; i < a.length - 1; i++) {
+      expectEquals(3 * i + 3, a[i]);;
+    }
+    try {
+      multipleUnitStrides(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleUnitStridesConditional(a, b);
+    for (int i = 2; i < a.length - 2; i++) {
+      int e = 3 * i + 3 + (((i & 1) == 0) ? i + 2 : i);
+      expectEquals(e, a[i]);;
+    }
+    try {
+      multipleUnitStridesConditional(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 4d6ea06..89875d7 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -70,6 +70,10 @@
   }
 }
 
+interface Filter {
+  public boolean isValid(int i);
+}
+
 public class Main {
 
   /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before)
@@ -78,7 +82,7 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
@@ -124,7 +128,6 @@
   }
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (before)
-  /// CHECK: NewInstance
   /// CHECK: StaticFieldGet
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
@@ -137,7 +140,6 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (after)
-  /// CHECK: NewInstance
   /// CHECK: StaticFieldGet
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
@@ -149,9 +151,6 @@
 
   // A new allocation (even non-singleton) shouldn't alias with pre-existing values.
   static int test3(TestClass obj) {
-    // Do an allocation here to avoid the HLoadClass and HClinitCheck
-    // at the second allocation.
-    new TestClass();
     TestClass obj1 = TestClass.sTestClassObj;
     TestClass obj2 = new TestClass();  // Cannot alias with obj or obj1 which pre-exist.
     obj.next = obj2;  // Make obj2 a non-singleton.
@@ -256,7 +255,7 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test8() load_store_elimination (after)
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK: InvokeVirtual
   /// CHECK-NOT: NullCheck
@@ -414,7 +413,7 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test16() load_store_elimination (after)
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
@@ -431,7 +430,7 @@
 
   /// CHECK-START: int Main.test17() load_store_elimination (after)
   /// CHECK: <<Const0:i\d+>> IntConstant 0
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
   /// CHECK: Return [<<Const0>>]
@@ -527,12 +526,12 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test22() load_store_elimination (after)
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
   /// CHECK-NOT: InstanceFieldGet
@@ -673,7 +672,7 @@
   /// CHECK: Select
 
   // Test that HSelect creates alias.
-  public static int $noinline$testHSelect(boolean b) {
+  static int $noinline$testHSelect(boolean b) {
     if (sFlag) {
       throw new Error();
     }
@@ -686,19 +685,51 @@
     return obj2.i;
   }
 
-  public static void assertIntEquals(int result, int expected) {
+  static int sumWithFilter(int[] array, Filter f) {
+    int sum = 0;
+    for (int i = 0; i < array.length; i++) {
+      if (f.isValid(array[i])) {
+        sum += array[i];
+      }
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.sumWithinRange(int[], int, int) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.sumWithinRange(int[], int, int) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+
+  // A lambda-style allocation can be eliminated after inlining.
+  static int sumWithinRange(int[] array, final int low, final int high) {
+    Filter filter = new Filter() {
+      public boolean isValid(int i) {
+        return (i >= low) && (i <= high);
+      }
+    };
+    return sumWithFilter(array, filter);
+  }
+
+  static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertFloatEquals(float result, float expected) {
+  static void assertFloatEquals(float result, float expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertDoubleEquals(double result, double expected) {
+  static void assertDoubleEquals(double result, double expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
@@ -746,6 +777,8 @@
     assertFloatEquals(test24(), 8.0f);
     testFinalizableByForcingGc();
     assertIntEquals($noinline$testHSelect(true), 0xdead);
+    int[] array = {2, 5, 9, -1, -3, 10, 8, 4};
+    assertIntEquals(sumWithinRange(array, 1, 5), 11);
   }
 
   static boolean sFlag;
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index be666e9..26475ae 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -16,9 +16,270 @@
 
 
 public class Main {
+  public static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertBooleanEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertStringContains(String searchTerm, String result) {
+    if (result == null || !result.contains(searchTerm)) {
+      throw new Error("Search term: " + searchTerm + ", not found in: " + result);
+    }
+  }
+
   public static void main(String[] args) {
     stringEqualsSame();
     stringArgumentNotNull("Foo");
+
+    assertIntEquals(0, $opt$noinline$getStringLength(""));
+    assertIntEquals(3, $opt$noinline$getStringLength("abc"));
+    assertIntEquals(10, $opt$noinline$getStringLength("0123456789"));
+
+    assertBooleanEquals(true, $opt$noinline$isStringEmpty(""));
+    assertBooleanEquals(false, $opt$noinline$isStringEmpty("abc"));
+    assertBooleanEquals(false, $opt$noinline$isStringEmpty("0123456789"));
+
+    assertCharEquals('a', $opt$noinline$stringCharAt("a", 0));
+    assertCharEquals('a', $opt$noinline$stringCharAt("abc", 0));
+    assertCharEquals('b', $opt$noinline$stringCharAt("abc", 1));
+    assertCharEquals('c', $opt$noinline$stringCharAt("abc", 2));
+    assertCharEquals('7', $opt$noinline$stringCharAt("0123456789", 7));
+
+    try {
+      $opt$noinline$stringCharAt("abc", -1);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+    try {
+      $opt$noinline$stringCharAt("abc", 3);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+    try {
+      $opt$noinline$stringCharAt("abc", Integer.MAX_VALUE);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+
+    assertCharEquals('7', $opt$noinline$stringCharAtCatch("0123456789", 7));
+    assertCharEquals('\0', $opt$noinline$stringCharAtCatch("0123456789", 10));
+
+    assertIntEquals('a' + 'b' + 'c', $opt$noinline$stringSumChars("abc"));
+    assertIntEquals('a' + 'b' + 'c', $opt$noinline$stringSumLeadingChars("abcdef", 3));
+    try {
+      $opt$noinline$stringSumLeadingChars("abcdef", 7);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringSumLeadingChars",
+                           sioob.getStackTrace()[1].toString());
+    }
+    assertIntEquals('a' + 'b' + 'c' + 'd', $opt$noinline$stringSum4LeadingChars("abcdef"));
+    try {
+      $opt$noinline$stringSum4LeadingChars("abc");
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringSum4LeadingChars",
+                           sioob.getStackTrace()[1].toString());
+    }
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Length:i\d+>>   InvokeVirtual intrinsic:StringLength
+  /// CHECK-DAG:                    Return [<<Length>>]
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    Return [<<Length>>]
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringLength
+
+  static public int $opt$noinline$getStringLength(String s) {
+    if (doThrow) { throw new Error(); }
+    return s.length();
+  }
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:  <<IsEmpty:z\d+>>  InvokeVirtual intrinsic:StringIsEmpty
+  /// CHECK-DAG:                    Return [<<IsEmpty>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:  <<IsEmpty:z\d+>>  Equal [<<Length>>,<<Const0>>]
+  /// CHECK-DAG:                    Return [<<IsEmpty>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringIsEmpty
+
+  static public boolean $opt$noinline$isStringEmpty(String s) {
+    if (doThrow) { throw new Error(); }
+    return s.isEmpty();
+  }
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Char:c\d+>>     InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Pos:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck [<<Pos>>,<<Length>>] is_string_char_at:true
+  /// CHECK-DAG:  <<Char:c\d+>>     ArrayGet [<<NullCk>>,<<Pos>>] is_string_char_at:true
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  static public char $opt$noinline$stringCharAt(String s, int pos) {
+    if (doThrow) { throw new Error(); }
+    return s.charAt(pos);
+  }
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Char:c\d+>>     InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Pos:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck [<<Pos>>,<<Length>>] is_string_char_at:true
+  /// CHECK-DAG:  <<Char:c\d+>>     ArrayGet [<<NullCk>>,<<Pos>>] is_string_char_at:true
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  static public char $opt$noinline$stringCharAtCatch(String s, int pos) {
+    if (doThrow) { throw new Error(); }
+    try {
+      return s.charAt(pos);
+    } catch (StringIndexOutOfBoundsException ignored) {
+      return '\0';
+    }
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringLength
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringLength
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) GVN (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-NOT:                    ArrayLength is_string_length:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck
+
+  static public int $opt$noinline$stringSumChars(String s) {
+    if (doThrow) { throw new Error(); }
+    int sum = 0;
+    int len = s.length();
+    for (int i = 0; i < len; ++i) {
+      sum += s.charAt(i);
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) BCE (after)
+  /// CHECK-DAG:                    Deoptimize env:[[{{[^\]]*}}]]
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck is_string_char_at:true
+
+  static public int $opt$noinline$stringSumLeadingChars(String s, int n) {
+    if (doThrow) { throw new Error(); }
+    int sum = 0;
+    for (int i = 0; i < n; ++i) {
+      sum += s.charAt(i);
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) BCE (after)
+  /// CHECK-DAG:                    Deoptimize env:[[{{[^\]]*}}]]
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck is_string_char_at:true
+
+  static public int $opt$noinline$stringSum4LeadingChars(String s) {
+    if (doThrow) { throw new Error(); }
+    int sum = s.charAt(0) + s.charAt(1) + s.charAt(2) + s.charAt(3);
+    return sum;
   }
 
   /// CHECK-START: boolean Main.stringEqualsSame() instruction_simplifier (before)
@@ -47,8 +308,28 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeVirtual {{.*\.equals.*}}
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
   /// CHECK-NOT:      test
+
+  /// CHECK-START-X86_64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      test
+
+  /// CHECK-START-ARM: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  // CompareAndBranchIfZero() may emit either CBZ or CMP+BEQ.
+  /// CHECK-NOT:      cbz
+  /// CHECK-NOT:      cmp {{r\d+}}, #0
+  // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
+  // whichever comes first.
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-ARM64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      cbz
+  // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
+  // whichever comes first.
+  /// CHECK:          cmp {{w.*,}} {{w.*}}
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
     return "foo".equals(obj);
@@ -56,12 +337,53 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeVirtual
-  /// CHECK:      test
-  /// CHECK:      jz/eq
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          test
+  /// CHECK:          jz/eq
   // Check that we don't try to compare the classes.
-  /// CHECK-NOT:  mov
-  /// CHECK:      cmp
+  /// CHECK-NOT:      mov
+  /// CHECK:          cmp
+
+  // Test is very brittle as it depends on the order we emit instructions.
+  /// CHECK-START-X86_64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          test
+  /// CHECK:          jz/eq
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:      mov
+  /// CHECK:          cmp
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-ARM: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          {{cbz|cmp}}
+  // Check that we don't try to compare the classes.
+  // The dissassembler currently explicitly emits the offset 0 but don't rely on it.
+  // We want to terminate the CHECK-NOT search after two CMPs, one for reference
+  // equality and one for length comparison but these may be emitted in different order,
+  // so repeat the check twice.
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}]
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}, #0]
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}]
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}, #0]
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-ARM64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          cbz
+  // Check that we don't try to compare the classes.
+  // The dissassembler currently does not explicitly emits the offset 0 but don't rely on it.
+  // We want to terminate the CHECK-NOT search after two CMPs, one for reference
+  // equality and one for length comparison but these may be emitted in different order,
+  // so repeat the check twice.
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
+  /// CHECK:          cmp {{w\d+}}, {{w\d+}}
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
+  /// CHECK:          cmp {{w\d+}}, {{w\d+}}
   public static boolean stringArgumentIsString() {
     return "foo".equals(myString);
   }
diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java
index 30ccc56..95a11ca 100644
--- a/test/537-checker-arraycopy/src/Main.java
+++ b/test/537-checker-arraycopy/src/Main.java
@@ -50,11 +50,11 @@
   }
 
   /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
-  /// CHECK-NOT:      test
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK-NOT:      test {{^[^\[].*}}, {{^[^\[].*}}
   /// CHECK-NOT:      call
   /// CHECK:          ReturnVoid
-  // Checks that the call is intrinsified and that there is no test instruction
+  // Checks that the call is intrinsified and that there is no register test instruction
   // when we know the source and destination are not null.
   public static void arraycopy() {
     Object[] obj = new Object[4];
@@ -65,7 +65,36 @@
     System.arraycopy(obj, 1, obj, 0, 1);
   }
 
+  // Test case for having enough registers on x86 for the arraycopy intrinsic.
+  /// CHECK-START-X86: void Main.arraycopy(java.lang.Object[], int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK-NOT:      mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          ReturnVoid
   public static void arraycopy(Object[] obj, int pos) {
     System.arraycopy(obj, pos, obj, 0, obj.length);
   }
+
+  // Test case for having enough registers on x86 for the arraycopy intrinsic
+  // when an input is passed twice.
+  /// CHECK-START-X86: int Main.arraycopy2(java.lang.Object[], int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK-NOT:      mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          Return
+  public static int arraycopy2(Object[] obj, int pos) {
+    System.arraycopy(obj, pos, obj, pos - 1, obj.length);
+    return pos;
+  }
+
+  // Test case for not having enough registers on x86. The arraycopy intrinsic
+  // will ask for length to be in stack and load it.
+  /// CHECK-START-X86: int Main.arraycopy3(java.lang.Object[], java.lang.Object[], int, int, int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK:          mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          Return
+  public static int arraycopy3(Object[] obj1, Object[] obj2, int input1, int input3, int input4) {
+    System.arraycopy(obj1, input1, obj2, input3, input4);
+    System.out.println(obj1);
+    System.out.println(obj2);
+    return input1 + input3 + input4;
+  }
 }
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index f791adf..f6713a2 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -473,7 +473,7 @@
   }
 
   /**
-   * Test that the `-1` constant is not synthesized in a register and that we
+   * ARM/ARM64: Test that the `-1` constant is not synthesized in a register and that we
    * instead simply switch between `add` and `sub` instructions with the
    * constant embedded.
    * We need two uses (or more) of the constant because the compiler always
@@ -491,10 +491,137 @@
   /// CHECK:                        sub x{{\d+}}, x{{\d+}}, #0x1
   /// CHECK:                        add x{{\d+}}, x{{\d+}}, #0x1
 
+  /// CHECK-START-ARM: long Main.addM1(long) register (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK-NOT:                    ParallelMove
+  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
+  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
+
+  /// CHECK-START-ARM: long Main.addM1(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, #1
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #-1
+  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, #1
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #0
+
   public static long addM1(long arg) {
     return (arg + (-1)) | (arg - (-1));
   }
 
+  /**
+   * ARM: Test that some long constants are not synthesized in a register for add-long.
+   * Also test some negative cases where we do synthetize constants in registers.
+   */
+
+  /// CHECK-START-ARM: long Main.addLongConstants(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK-DAG: <<ConstA:j\d+>>    LongConstant 4486007727657233
+  /// CHECK-DAG: <<ConstB:j\d+>>    LongConstant 4486011735248896
+  /// CHECK-DAG: <<ConstC:j\d+>>    LongConstant -1071856711330889728
+  /// CHECK-DAG: <<ConstD:j\d+>>    LongConstant 17587891077120
+  /// CHECK-DAG: <<ConstE:j\d+>>    LongConstant -8808977924096
+  /// CHECK-DAG: <<ConstF:j\d+>>    LongConstant 17587891077121
+  /// CHECK-DAG: <<ConstG:j\d+>>    LongConstant 4095
+  /// CHECK:                        Add [<<Arg>>,<<ConstA>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #286331153
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Add [<<Arg>>,<<ConstB>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Add [<<Arg>>,<<ConstC>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #16711680
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #249561088
+  /// CHECK:                        Add [<<Arg>>,<<ConstD>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        addw r{{\d+}}, r{{\d+}}, #4095
+  /// CHECK:                        Add [<<Arg>>,<<ConstE>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        subw r{{\d+}}, r{{\d+}}, #2051
+  /// CHECK:                        Add [<<Arg>>,<<ConstF>>]
+  /// CHECK-NEXT:                   adds{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   adc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK:                        Add [<<Arg>>,<<ConstG>>]
+  /// CHECK-NEXT:                   adds{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   adc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static long addLongConstants(long arg) {
+    return
+        // Modified immediates.
+        (arg + 0x000ff00011111111L) ^  // 4486007727657233
+        // Modified immediates high and -low.
+        (arg + 0x000ff000fff01000L) ^  // 4486011735248896
+        // Modified immediates ~high and -low.
+        (arg + 0xf11fffffff010000L) ^  // -1071856711330889728
+        // Low word 0 (no carry), high is imm12.
+        (arg + 0x00000fff00000000L) ^  // 17587891077120
+        // Low word 0 (no carry), -high is imm12.
+        (arg + 0xfffff7fd00000000L) ^  // -8808977924096
+        // Cannot embed imm12 in ADC/SBC for high word.
+        (arg + 0x00000fff00000001L) ^  // 17587891077121
+        // Cannot embed imm12 in ADDS/SUBS for low word (need to set flags).
+        (arg + 0x0000000000000fffL) ^  // 4095
+        arg;
+  }
+
+  /**
+   * ARM: Test that some long constants are not synthesized in a register for add-long.
+   * Also test some negative cases where we do synthetize constants in registers.
+   */
+
+  /// CHECK-START-ARM: long Main.subLongConstants(long) disassembly (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK-DAG: <<ConstA:j\d+>>    LongConstant 4486007727657233
+  /// CHECK-DAG: <<ConstB:j\d+>>    LongConstant 4486011735248896
+  /// CHECK-DAG: <<ConstC:j\d+>>    LongConstant -1071856711330889728
+  /// CHECK-DAG: <<ConstD:j\d+>>    LongConstant 17587891077120
+  /// CHECK-DAG: <<ConstE:j\d+>>    LongConstant -8808977924096
+  /// CHECK-DAG: <<ConstF:j\d+>>    LongConstant 17587891077121
+  /// CHECK-DAG: <<ConstG:j\d+>>    LongConstant 4095
+  /// CHECK:                        Sub [<<Arg>>,<<ConstA>>]
+  /// CHECK-NEXT:                   subs r{{\d+}}, r{{\d+}}, #286331153
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Sub [<<Arg>>,<<ConstB>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK-NEXT:                   sbc r{{\d+}}, r{{\d+}}, #1044480
+  /// CHECK:                        Sub [<<Arg>>,<<ConstC>>]
+  /// CHECK-NEXT:                   adds r{{\d+}}, r{{\d+}}, #16711680
+  /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #249561088
+  /// CHECK:                        Sub [<<Arg>>,<<ConstD>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        subw r{{\d+}}, r{{\d+}}, #4095
+  /// CHECK:                        Sub [<<Arg>>,<<ConstE>>]
+  // There may or may not be a MOV here.
+  /// CHECK:                        addw r{{\d+}}, r{{\d+}}, #2051
+  /// CHECK:                        Sub [<<Arg>>,<<ConstF>>]
+  /// CHECK-NEXT:                   subs{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   sbc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK:                        Sub [<<Arg>>,<<ConstG>>]
+  /// CHECK-NEXT:                   subs{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  /// CHECK-NEXT:                   sbc{{(\.w)?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+
+  public static long subLongConstants(long arg) {
+    return
+        // Modified immediates.
+        (arg - 0x000ff00011111111L) ^  // 4486007727657233
+        // Modified immediates high and -low.
+        (arg - 0x000ff000fff01000L) ^  // 4486011735248896
+        // Modified immediates ~high and -low.
+        (arg - 0xf11fffffff010000L) ^  // -1071856711330889728
+        // Low word 0 (no carry), high is imm12.
+        (arg - 0x00000fff00000000L) ^  // 17587891077120
+        // Low word 0 (no carry), -high is imm12.
+        (arg - 0xfffff7fd00000000L) ^  // -8808977924096
+        // Cannot embed imm12 in ADC/SBC for high word.
+        (arg - 0x00000fff00000001L) ^  // 17587891077121
+        // Cannot embed imm12 in ADDS/SUBS for low word (need to set flags).
+        (arg - 0x0000000000000fffL) ^  // 4095
+        arg;
+  }
+
   public static void main(String[] args) {
     int arg = 0x87654321;
     assertIntEquals(and255(arg), 0x21);
@@ -522,7 +649,7 @@
     assertLongEquals(xor0xfffffff00000000f(longArg), 0xedcba9888765432eL);
     assertLongEquals(xor0xf00000000000000f(longArg), 0xe23456788765432eL);
 
-    assertLongEquals(14, addM1(7));
+    assertLongEquals(14L, addM1(7));
 
     assertLongEquals(shl1(longArg), 0x2468acf10eca8642L);
     assertLongEquals(shl2(longArg), 0x48d159e21d950c84L);
@@ -562,5 +689,30 @@
     assertLongEquals(ushr32(~longArg), 0x00000000edcba987L);
     assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L);
     assertLongEquals(ushr63(~longArg), 0x0000000000000001L);
+
+    // Test -1, 0, +1 and arbitrary constants just before and after overflow
+    // on low word in subexpressions of addLongConstants()/subLongConstants(),
+    // so that we check that we carry the overflow correctly to the high word.
+    // For example
+    //    0x111eeeeeeee+0x000ff00011111111 = 0x000ff111ffffffff (carry=0),
+    //    0x111eeeeeeef+0x000ff00011111111 = 0x000ff11200000000 (carry=1).
+    assertLongEquals(0xf11ff7fdee1e1111L, addLongConstants(0xffffffffffffffffL));
+    assertLongEquals(0xee0080211e00eefL, addLongConstants(0x0L));
+    assertLongEquals(0xee0080211e01111L, addLongConstants(0x1L));
+    assertLongEquals(0xedff81c12201113L, addLongConstants(0x111eeeeeeeeL));
+    assertLongEquals(0xedff81feddfeef1L, addLongConstants(0x111eeeeeeefL));
+    assertLongEquals(0xedff83e11c1f111L, addLongConstants(0x222000fefffL));
+    assertLongEquals(0xedff83fee3e0eefL, addLongConstants(0x222000ff000L));
+    assertLongEquals(0xedff805edfe1111L, addLongConstants(0x33300feffffL));
+    assertLongEquals(0xedff80412000eefL, addLongConstants(0x33300ff0000L));
+    assertLongEquals(0xee0080211e00eefL, subLongConstants(0xffffffffffffffffL));
+    assertLongEquals(0xf11ff7fdee1e1111L, subLongConstants(0x0L));
+    assertLongEquals(0xf11ff7fc11e1eef3L, subLongConstants(0x1L));
+    assertLongEquals(0xee0080412201113L, subLongConstants(0x44411111111L));
+    assertLongEquals(0xee0080412201111L, subLongConstants(0x44411111112L));
+    assertLongEquals(0xee0080e11c1f111L, subLongConstants(0x555fff01000L));
+    assertLongEquals(0xee0080e11c1eef3L, subLongConstants(0x555fff01001L));
+    assertLongEquals(0xee0080dedfe1111L, subLongConstants(0x666ff010000L));
+    assertLongEquals(0xee0080dedffeef3L, subLongConstants(0x666ff010001L));
   }
 }
diff --git a/test/540-checker-rtp-bug/src/Main.java b/test/540-checker-rtp-bug/src/Main.java
index 17b11db..19b7fb7 100644
--- a/test/540-checker-rtp-bug/src/Main.java
+++ b/test/540-checker-rtp-bug/src/Main.java
@@ -48,7 +48,7 @@
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
 
-  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) dead_code_elimination (after)
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) dead_code_elimination$initial (after)
   /// CHECK:    <<Phi:l\d+>>     Phi
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
diff --git a/test/542-unresolved-access-check/src/Main.java b/test/542-unresolved-access-check/src/Main.java
index 2bdf47f..62bfea1 100644
--- a/test/542-unresolved-access-check/src/Main.java
+++ b/test/542-unresolved-access-check/src/Main.java
@@ -58,7 +58,7 @@
             "loadClassBinaryName", String.class, ClassLoader.class, List.class);
 
         if (dex != null) {
-          Class clazz = (Class)method.invoke(dex, className, this, null);
+          Class<?> clazz = (Class<?>)method.invoke(dex, className, this, null);
           if (clazz != null) {
             return clazz;
           }
@@ -72,7 +72,7 @@
 public class Main {
     public static void main(String[] args) throws Exception {
       MyClassLoader o = new MyClassLoader();
-      Class foo = o.loadClass("LoadedByMyClassLoader");
+      Class<?> foo = o.loadClass("LoadedByMyClassLoader");
       Method m = foo.getDeclaredMethod("main");
       m.invoke(null);
     }
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 9f9916d..5557c7b 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -26,18 +26,18 @@
 # Test a case when one entering TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
@@ -71,18 +71,18 @@
 # Test a case when one exiting TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -117,21 +117,21 @@
 # Test that a catch block remains live and consistent if some of try blocks
 # throwing into it are removed.
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -203,7 +203,7 @@
 
 # Test that DCE removes catch phi uses of instructions defined in dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<Arg0:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Arg1:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Const0xa:i\d+>>  IntConstant 10
@@ -220,7 +220,7 @@
 ## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 ## CHECK-DAG:                        Phi [<<Select>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
 ## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
@@ -277,7 +277,7 @@
 # Test that DCE does not remove catch phi uses of instructions defined outside
 # dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (before)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
@@ -287,7 +287,7 @@
 ## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
 ## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
diff --git a/test/543-checker-dce-trycatch/src/Main.java b/test/543-checker-dce-trycatch/src/Main.java
index 6e73d0d..19587e7 100644
--- a/test/543-checker-dce-trycatch/src/Main.java
+++ b/test/543-checker-dce-trycatch/src/Main.java
@@ -35,10 +35,10 @@
   // where TryBoundary still has exception handler successors after having removed
   // some already.
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination_final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
   /// CHECK-NOT: TryBoundary
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination_final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
   /// CHECK: begin_block
   /// CHECK: begin_block
   /// CHECK: begin_block
diff --git a/test/545-tracing-and-jit/src/Main.java b/test/545-tracing-and-jit/src/Main.java
index a2d51d5..f365c6e 100644
--- a/test/545-tracing-and-jit/src/Main.java
+++ b/test/545-tracing-and-jit/src/Main.java
@@ -226,7 +226,7 @@
         private static final Method getMethodTracingModeMethod;
         static {
             try {
-                Class c = Class.forName("dalvik.system.VMDebug");
+                Class<?> c = Class.forName("dalvik.system.VMDebug");
                 startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
                         Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
                 stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
diff --git a/test/548-checker-inlining-and-dce/src/Main.java b/test/548-checker-inlining-and-dce/src/Main.java
index 38fdcc0..bf64c3b 100644
--- a/test/548-checker-inlining-and-dce/src/Main.java
+++ b/test/548-checker-inlining-and-dce/src/Main.java
@@ -16,17 +16,19 @@
 
 public class Main {
 
+  static boolean doThrow = false;
+
   private void inlinedForNull(Iterable it) {
     if (it != null) {
-      // We're not inlining invoke-interface at the moment.
-      it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
     }
   }
 
   private void inlinedForFalse(boolean value, Iterable it) {
     if (value) {
-      // We're not inlining invoke-interface at the moment.
-      it.iterator();
+      // We're not inlining throw at the moment.
+      if (doThrow) { throw new Error(""); }
     }
   }
 
diff --git a/test/552-checker-primitive-typeprop/src/Main.java b/test/552-checker-primitive-typeprop/src/Main.java
index fe2343e..1296800 100644
--- a/test/552-checker-primitive-typeprop/src/Main.java
+++ b/test/552-checker-primitive-typeprop/src/Main.java
@@ -29,15 +29,15 @@
 
   public static void main(String[] args) throws Exception {
     Class<?> c = Class.forName("SsaBuilder");
-    Method m = c.getMethod("environmentPhi", new Class[] { boolean.class, int[].class });
+    Method m = c.getMethod("environmentPhi", boolean.class, int[].class);
 
     int[] array = new int[3];
     int result;
 
-    result = (Integer) m.invoke(null, new Object[] { true, array } );
+    result = (Integer) m.invoke(null, true, array);
     assertEquals(2, result);
 
-    result = (Integer) m.invoke(null, new Object[] { false, array } );
+    result = (Integer) m.invoke(null, false, array);
     assertEquals(0, result);
   }
 }
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index 3d985bf..2232ff4 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -28,6 +28,12 @@
     }
   }
 
+  public static void assertClassEquals(Class<?> expected, Class<?> result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static boolean doThrow = false;
 
   private static int $noinline$foo(int x) {
@@ -45,6 +51,10 @@
   /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
@@ -56,6 +66,10 @@
   /// CHECK:                ArmDexCacheArraysBase
   /// CHECK-NOT:            ArmDexCacheArraysBase
 
+  /// CHECK-START-MIPS: int Main.testSimple(int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
   /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
@@ -77,6 +91,11 @@
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
@@ -94,6 +113,14 @@
   /// CHECK:                ArmDexCacheArraysBase
   /// CHECK-NEXT:           If
 
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
@@ -104,7 +131,7 @@
 
   public static int testDiamond(boolean negate, int x) {
     // These calls should use PC-relative dex cache array loads to retrieve the target method.
-    // PC-relative bases used by X86 and ARM should be pulled before the If.
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the If.
     if (negate) {
       return $noinline$foo(-x);
     } else {
@@ -148,8 +175,26 @@
   /// CHECK:                begin_block
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (before)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   public static int testLoop(int[] array, int x) {
-    // PC-relative bases used by X86 and ARM should be pulled before the loop.
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop.
     for (int i : array) {
       x += $noinline$foo(i);
     }
@@ -176,8 +221,18 @@
   /// CHECK-NEXT:           ArmDexCacheArraysBase
   /// CHECK-NEXT:           Goto
 
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (before)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
   public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
-    // PC-relative bases used by X86 and ARM should be pulled before the loop
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop
     // but not outside the if.
     if (array != null) {
       for (int i : array) {
@@ -214,6 +269,11 @@
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
   /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
 
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
   public static String $noinline$getBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -244,6 +304,13 @@
   /// CHECK-START-ARM64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
   /// CHECK:                LoadString load_kind:DexCachePcRelative
 
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_mips (after)
+  /// CHECK-DAG:            MipsDexCacheArraysBase
+  /// CHECK-DAG:            LoadString load_kind:DexCachePcRelative
+
   public static String $noinline$getNonBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -251,6 +318,78 @@
     return "non-boot-image-string";
   }
 
+  /// CHECK-START: java.lang.Class Main.$noinline$getStringClass() sharpening (before)
+  /// CHECK:                LoadClass load_kind:DexCacheViaMethod class_name:java.lang.String
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  public static Class<?> $noinline$getStringClass() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // String class is known to be in the boot image.
+    return String.class;
+  }
+
+  /// CHECK-START: java.lang.Class Main.$noinline$getOtherClass() sharpening (before)
+  /// CHECK:                LoadClass load_kind:DexCacheViaMethod class_name:Other
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() pc_relative_fixups_x86 (after)
+  /// CHECK-DAG:            X86ComputeBaseMethodAddress
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_arm (after)
+  /// CHECK-DAG:            ArmDexCacheArraysBase
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_mips (after)
+  /// CHECK-DAG:            MipsDexCacheArraysBase
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  public static Class<?> $noinline$getOtherClass() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // Other class is not in the boot image.
+    return Other.class;
+  }
+
   public static void main(String[] args) {
     assertIntEquals(1, testSimple(1));
     assertIntEquals(1, testDiamond(false, 1));
@@ -262,5 +401,10 @@
     assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
     assertStringEquals("", $noinline$getBootImageString());
     assertStringEquals("non-boot-image-string", $noinline$getNonBootImageString());
+    assertClassEquals(String.class, $noinline$getStringClass());
+    assertClassEquals(Other.class, $noinline$getOtherClass());
   }
 }
+
+class Other {
+}
diff --git a/test/557-checker-instruction-simplifier-ror/src/Main.java b/test/557-checker-instruction-simplifier-ror/src/Main.java
index 6d8b74d..0e3d145 100644
--- a/test/557-checker-instruction-simplifier-ror/src/Main.java
+++ b/test/557-checker-instruction-simplifier-ror/src/Main.java
@@ -175,7 +175,7 @@
 
   //  (i >>> #distance) | (i << #-distance)
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (before)
   /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
   /// CHECK:          <<Const2:i\d+>>       IntConstant 2
   /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
@@ -184,13 +184,13 @@
   /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
   /// CHECK:                                Return [<<Or>>]
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (after)
   /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
   /// CHECK:          <<Const2:i\d+>>       IntConstant 2
   /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
   /// CHECK:                                Return [<<Ror>>]
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:      UShr
   /// CHECK-NOT:      Shl
   public static int ror_int_constant_c_negc(int value) {
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index 7ce60a3..5d4aa56 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -28,7 +28,7 @@
 #  exit    \-    \
 #           other_loop_entry
 #
-## CHECK-START: int IrreducibleLoop.simpleLoop(int) dead_code_elimination (before)
+## CHECK-START: int IrreducibleLoop.simpleLoop(int) dead_code_elimination$initial (before)
 ## CHECK: irreducible:true
 .method public static simpleLoop(I)I
    .registers 2
@@ -65,7 +65,7 @@
 #                other_loop_entry
 #              set 30 in p1:myField
 #
-## CHECK-START: int IrreducibleLoop.lse(int, Main) dead_code_elimination (after)
+## CHECK-START: int IrreducibleLoop.lse(int, Main) dead_code_elimination$initial (after)
 ## CHECK: irreducible:true
 #
 ## CHECK-START: int IrreducibleLoop.lse(int, Main) load_store_elimination (after)
@@ -101,10 +101,10 @@
 #  exit    \-    \
 #           other_loop_entry
 #
-## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (before)
+## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination$initial (before)
 ## CHECK: irreducible:true
 
-## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (after)
+## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination$initial (after)
 ## CHECK: irreducible:true
 .method public static dce(I)I
    .registers 3
diff --git a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
index b82ed92..75344f7 100644
--- a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -16,7 +16,7 @@
 
 .super Ljava/lang/Object;
 
-## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination (before)
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
 ## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:{{B\d+}} irreducible:true
diff --git a/test/565-checker-doublenegbitwise/src/Main.java b/test/565-checker-doublenegbitwise/src/Main.java
index e426b75..811c280 100644
--- a/test/565-checker-doublenegbitwise/src/Main.java
+++ b/test/565-checker-doublenegbitwise/src/Main.java
@@ -70,7 +70,7 @@
    * same pass.
    */
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (before)
   /// CHECK:       <<P1:z\d+>>          ParameterValue
   /// CHECK:       <<P2:z\d+>>          ParameterValue
   /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
@@ -80,18 +80,18 @@
   /// CHECK:       <<And:i\d+>>         And [<<Select2>>,<<Select1>>]
   /// CHECK:                            Return [<<And>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:       <<Cond1:z\d+>>       ParameterValue
   /// CHECK:       <<Cond2:z\d+>>       ParameterValue
   /// CHECK:       <<Or:i\d+>>          Or [<<Cond2>>,<<Cond1>>]
   /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<Or>>]
   /// CHECK:                            Return [<<BooleanNot>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:                            BooleanNot
   /// CHECK-NOT:                        BooleanNot
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                        And
 
   public static boolean $opt$noinline$booleanAndToOr(boolean a, boolean b) {
@@ -138,7 +138,7 @@
    * same pass.
    */
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (before)
   /// CHECK:       <<P1:z\d+>>          ParameterValue
   /// CHECK:       <<P2:z\d+>>          ParameterValue
   /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
@@ -148,18 +148,18 @@
   /// CHECK:       <<Or:i\d+>>          Or [<<Select2>>,<<Select1>>]
   /// CHECK:                            Return [<<Or>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:       <<Cond1:z\d+>>       ParameterValue
   /// CHECK:       <<Cond2:z\d+>>       ParameterValue
   /// CHECK:       <<And:i\d+>>         And [<<Cond2>>,<<Cond1>>]
   /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<And>>]
   /// CHECK:                            Return [<<BooleanNot>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:                            BooleanNot
   /// CHECK-NOT:                        BooleanNot
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                        Or
 
   public static boolean $opt$noinline$booleanOrToAnd(boolean a, boolean b) {
@@ -246,7 +246,7 @@
    * same pass.
    */
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (before)
   /// CHECK:       <<P1:z\d+>>          ParameterValue
   /// CHECK:       <<P2:z\d+>>          ParameterValue
   /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
@@ -256,13 +256,13 @@
   /// CHECK:       <<Xor:i\d+>>         Xor [<<Select2>>,<<Select1>>]
   /// CHECK:                            Return [<<Xor>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:       <<Cond1:z\d+>>       ParameterValue
   /// CHECK:       <<Cond2:z\d+>>       ParameterValue
   /// CHECK:       <<Xor:i\d+>>         Xor [<<Cond2>>,<<Cond1>>]
   /// CHECK:                            Return [<<Xor>>]
 
-  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                        BooleanNot
 
   public static boolean $opt$noinline$booleanNotXorToXor(boolean a, boolean b) {
diff --git a/test/565-checker-rotate/src/Main.java b/test/565-checker-rotate/src/Main.java
index aadb597..eb0e868 100644
--- a/test/565-checker-rotate/src/Main.java
+++ b/test/565-checker-rotate/src/Main.java
@@ -52,14 +52,14 @@
   /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) select_generator (after)
   /// CHECK-NOT:                      Phi
 
-  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier$after_bce (after)
   /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
   /// CHECK:         <<ArgDist:i\d+>> ParameterValue
   /// CHECK-DAG:     <<NegDist:i\d+>> Neg [<<ArgDist>>]
   /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<NegDist>>]
   /// CHECK-DAG:                      Return [<<Result>>]
 
-  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.rotateLeftBoolean(boolean, int) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                      Select
 
   private static int rotateLeftBoolean(boolean value, int distance) {
@@ -206,13 +206,13 @@
   /// CHECK-START: int Main.rotateRightBoolean(boolean, int) select_generator (after)
   /// CHECK-NOT:                     Phi
 
-  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier$after_bce (after)
   /// CHECK:         <<ArgVal:z\d+>>  ParameterValue
   /// CHECK:         <<ArgDist:i\d+>> ParameterValue
   /// CHECK-DAG:     <<Result:i\d+>>  Ror [<<ArgVal>>,<<ArgDist>>]
   /// CHECK-DAG:                      Return [<<Result>>]
 
-  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.rotateRightBoolean(boolean, int) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                     Select
 
   private static int rotateRightBoolean(boolean value, int distance) {
diff --git a/test/566-checker-signum/src/Main.java b/test/566-checker-signum/src/Main.java
index 5f2cf3d..7fc9e84 100644
--- a/test/566-checker-signum/src/Main.java
+++ b/test/566-checker-signum/src/Main.java
@@ -45,13 +45,13 @@
   /// CHECK-START: int Main.signBoolean(boolean) select_generator (after)
   /// CHECK-NOT:                     Phi
 
-  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>    ParameterValue
   /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Result:i\d+>> Compare [<<Arg>>,<<Zero>>]
   /// CHECK-DAG:                     Return [<<Result>>]
 
-  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.signBoolean(boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                     Select
 
   private static int signBoolean(boolean x) {
diff --git a/test/566-polymorphic-inlining/polymorphic_inline.cc b/test/566-polymorphic-inlining/polymorphic_inline.cc
index c1651e5..89293cc 100644
--- a/test/566-polymorphic-inlining/polymorphic_inline.cc
+++ b/test/566-polymorphic-inlining/polymorphic_inline.cc
@@ -15,8 +15,10 @@
  */
 
 #include "art_method.h"
+#include "base/enums.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jit/profiling_info.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
@@ -28,7 +30,7 @@
   mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
   jit::Jit* jit = Runtime::Current()->GetJit();
   jit::JitCodeCache* code_cache = jit->GetCodeCache();
-  ArtMethod* method = klass->FindDeclaredDirectMethodByName(method_name, sizeof(void*));
+  ArtMethod* method = klass->FindDeclaredDirectMethodByName(method_name, kRuntimePointerSize);
 
   OatQuickMethodHeader* header = nullptr;
   // Infinite loop... Test harness will have its own timeout.
@@ -37,8 +39,10 @@
     if (code_cache->ContainsPc(header->GetCode())) {
       break;
     } else {
-      // sleep one second to give time to the JIT compiler.
-      sleep(1);
+      // Sleep to yield to the compiler thread.
+      usleep(1000);
+      // Will either ensure it's compiled or do the compilation itself.
+      jit->CompileMethod(method, soa.Self(), /* osr */ false);
     }
   }
 
@@ -47,7 +51,25 @@
   CHECK(info.HasInlineInfo(encoding));
 }
 
-extern "C" JNIEXPORT void JNICALL Java_Main_ensureJittedAndPolymorphicInline(JNIEnv*, jclass cls) {
+static void allocate_profiling_info(jclass cls, const char* method_name) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
+  ArtMethod* method = klass->FindDeclaredDirectMethodByName(method_name, kRuntimePointerSize);
+  ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureProfilingInfo566(JNIEnv*, jclass cls) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return;
+  }
+
+  allocate_profiling_info(cls, "testInvokeVirtual");
+  allocate_profiling_info(cls, "testInvokeInterface");
+  allocate_profiling_info(cls, "$noinline$testInlineToSameTarget");
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureJittedAndPolymorphicInline566(JNIEnv*, jclass cls) {
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit == nullptr) {
     return;
diff --git a/test/566-polymorphic-inlining/src/Main.java b/test/566-polymorphic-inlining/src/Main.java
index 411264d..793b85f 100644
--- a/test/566-polymorphic-inlining/src/Main.java
+++ b/test/566-polymorphic-inlining/src/Main.java
@@ -15,9 +15,9 @@
  */
 
 interface Itf {
-  public Class sameInvokeInterface();
-  public Class sameInvokeInterface2();
-  public Class sameInvokeInterface3();
+  public Class<?> sameInvokeInterface();
+  public Class<?> sameInvokeInterface2();
+  public Class<?> sameInvokeInterface3();
 }
 
 public class Main implements Itf {
@@ -41,6 +41,9 @@
     itfs[1] = mains[1] = new Subclass();
     itfs[2] = mains[2] = new OtherSubclass();
 
+    // Create the profiling info eagerly to make sure they are filled.
+    ensureProfilingInfo566();
+
     // Make testInvokeVirtual and testInvokeInterface hot to get them jitted.
     // We pass Main and Subclass to get polymorphic inlining based on calling
     // the same method.
@@ -55,7 +58,7 @@
       $noinline$testInlineToSameTarget(mains[1]);
     }
 
-    ensureJittedAndPolymorphicInline();
+    ensureJittedAndPolymorphicInline566();
 
     // At this point, the JIT should have compiled both methods, and inline
     // sameInvokeVirtual and sameInvokeInterface.
@@ -78,31 +81,31 @@
     assertEquals(20001, counter);
   }
 
-  public Class sameInvokeVirtual() {
-    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo
+  public Class<?> sameInvokeVirtual() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
     return Main.class;
   }
 
-  public Class sameInvokeInterface() {
-    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo
-    return Itf.class;
-  }
-
-  public Class sameInvokeInterface2() {
+  public Class<?> sameInvokeInterface() {
     field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
     return Itf.class;
   }
 
-  public Class sameInvokeInterface3() {
+  public Class<?> sameInvokeInterface2() {
     field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
     return Itf.class;
   }
 
-  public static Class testInvokeInterface(Itf i) {
+  public Class<?> sameInvokeInterface3() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Itf.class;
+  }
+
+  public static Class<?> testInvokeInterface(Itf i) {
     return i.sameInvokeInterface();
   }
 
-  public static Class testInvokeInterface2(Itf i) {
+  public static Class<?> testInvokeInterface2(Itf i) {
     // Make three interface calls that will do a ClassTableGet to ensure bogus code
     // generation of ClassTableGet will crash.
     i.sameInvokeInterface();
@@ -110,7 +113,7 @@
     return i.sameInvokeInterface3();
   }
 
-  public static Class testInvokeVirtual(Main m) {
+  public static Class<?> testInvokeVirtual(Main m) {
     return m.sameInvokeVirtual();
   }
 
@@ -121,9 +124,11 @@
 
   public Object field = new Object();
 
-  public static native void ensureJittedAndPolymorphicInline();
+  public static native void ensureJittedAndPolymorphicInline566();
+  public static native void ensureProfilingInfo566();
 
   public void increment() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo
     counter++;
   }
   public static int counter = 0;
@@ -134,18 +139,18 @@
 }
 
 class OtherSubclass extends Main {
-  public Class sameInvokeVirtual() {
+  public Class<?> sameInvokeVirtual() {
     return OtherSubclass.class;
   }
 
-  public Class sameInvokeInterface() {
+  public Class<?> sameInvokeInterface() {
     return OtherSubclass.class;
   }
 
-  public Class sameInvokeInterface2() {
+  public Class<?> sameInvokeInterface2() {
     return null;
   }
-  public Class sameInvokeInterface3() {
+  public Class<?> sameInvokeInterface3() {
     return null;
   }
 }
diff --git a/test/567-checker-compare/src/Main.java b/test/567-checker-compare/src/Main.java
index 8587950..a05bb60 100644
--- a/test/567-checker-compare/src/Main.java
+++ b/test/567-checker-compare/src/Main.java
@@ -75,13 +75,13 @@
   /// CHECK-START: int Main.compareBooleans(boolean, boolean) select_generator (after)
   /// CHECK-NOT:                     Phi
 
-  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<ArgX:z\d+>>   ParameterValue
   /// CHECK:         <<ArgY:z\d+>>   ParameterValue
   /// CHECK-DAG:     <<Result:i\d+>> Compare [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                     Return [<<Result>>]
 
-  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.compareBooleans(boolean, boolean) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:                     Select
 
   private static int compareBooleans(boolean x, boolean y) {
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index 2fa5800..cf413ba 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -136,7 +136,7 @@
     if (m_name.compare(method_name_) == 0) {
       while (jit->GetCodeCache()->LookupOsrMethodHeader(m) == nullptr) {
         // Sleep to yield to the compiler thread.
-        sleep(0);
+        usleep(1000);
         // Will either ensure it's compiled or do the compilation itself.
         jit->CompileMethod(m, Thread::Current(), /* osr */ true);
       }
diff --git a/test/570-checker-osr/smali/Osr.smali b/test/570-checker-osr/smali/Osr.smali
index 869c7c3..6592b7b 100644
--- a/test/570-checker-osr/smali/Osr.smali
+++ b/test/570-checker-osr/smali/Osr.smali
@@ -19,7 +19,7 @@
 # Check that blocks only havig nops are not merged when they are loop headers.
 # This ensures we can do on-stack replacement for branches to those nop blocks.
 
-## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination_final (after)
+## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination$final (after)
 ## CHECK-DAG:                     SuspendCheck loop:<<OuterLoop:B\d+>> outer_loop:none
 ## CHECK-DAG:                     SuspendCheck loop:{{B\d+}} outer_loop:<<OuterLoop>>
 .method public static simpleLoop(II)I
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 15c232d..8af3894 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -129,7 +129,7 @@
     DeoptimizationController.startDeoptimization();
   }
 
-  public static Class $noinline$inlineCache(Main m, boolean isSecondInvocation) {
+  public static Class<?> $noinline$inlineCache(Main m, boolean isSecondInvocation) {
     // If we are running in non-JIT mode, or were unlucky enough to get this method
     // already JITted, just return the expected value.
     if (!isInInterpreter("$noinline$inlineCache")) {
@@ -159,7 +159,7 @@
     return other.returnClass();
   }
 
-  public static Class $noinline$inlineCache2(Main m, boolean isSecondInvocation) {
+  public static Class<?> $noinline$inlineCache2(Main m, boolean isSecondInvocation) {
     // If we are running in non-JIT mode, or were unlucky enough to get this method
     // already JITted, just return the expected value.
     if (!isInInterpreter("$noinline$inlineCache2")) {
@@ -188,7 +188,7 @@
     return (other == null) ? null : other.returnClass();
   }
 
-  public static Class $noinline$inlineCache3(Main m, boolean isSecondInvocation) {
+  public static Class<?> $noinline$inlineCache3(Main m, boolean isSecondInvocation) {
     // If we are running in non-JIT mode, or were unlucky enough to get this method
     // already JITted, just return the expected value.
     if (!isInInterpreter("$noinline$inlineCache3")) {
@@ -229,7 +229,7 @@
     return null;
   }
 
-  public Class returnClass() {
+  public Class<?> returnClass() {
     return Main.class;
   }
 
@@ -305,7 +305,7 @@
 }
 
 class SubMain extends Main {
-  public Class returnClass() {
+  public Class<?> returnClass() {
     return SubMain.class;
   }
 
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index 59741d6..e0a76ca 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -16,6 +16,8 @@
 
 public class Main {
 
+  static boolean doThrow = false;
+
   /// CHECK-START: int Main.BoolCond_IntVarVar(boolean, int, int) register (after)
   /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
 
@@ -35,6 +37,10 @@
   /// CHECK:                          cmovnz/ne
 
   public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? x : y;
   }
 
@@ -57,6 +63,10 @@
   /// CHECK:                          cmovnz/ne
 
   public static int BoolCond_IntVarCst(boolean cond, int x) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? x : 1;
   }
 
@@ -79,6 +89,10 @@
   /// CHECK:                          cmovnz/ne
 
   public static int BoolCond_IntCstVar(boolean cond, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? 1 : y;
   }
 
@@ -102,6 +116,10 @@
   /// CHECK-NEXT:                     cmovnz/ne
 
   public static long BoolCond_LongVarVar(boolean cond, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? x : y;
   }
 
@@ -125,6 +143,10 @@
   /// CHECK-NEXT:                     cmovnz/ne
 
   public static long BoolCond_LongVarCst(boolean cond, long x) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? x : 1L;
   }
 
@@ -148,6 +170,10 @@
   /// CHECK-NEXT:                     cmovnz/ne
 
   public static long BoolCond_LongCstVar(boolean cond, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? 1L : y;
   }
 
@@ -160,6 +186,10 @@
   /// CHECK-NEXT:            fcsel ne
 
   public static float BoolCond_FloatVarVar(boolean cond, float x, float y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? x : y;
   }
 
@@ -172,6 +202,10 @@
   /// CHECK-NEXT:            fcsel ne
 
   public static float BoolCond_FloatVarCst(boolean cond, float x) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? x : 1.0f;
   }
 
@@ -184,6 +218,10 @@
   /// CHECK-NEXT:            fcsel ne
 
   public static float BoolCond_FloatCstVar(boolean cond, float y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return cond ? 1.0f : y;
   }
 
@@ -207,6 +245,10 @@
   /// CHECK:                          cmovle/ng
 
   public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return a > b ? x : y;
   }
 
@@ -233,6 +275,10 @@
   /// CHECK:                          cmovle/ng
 
   public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     int result = (a > b ? x : y);
     return result + (a > b ? 0 : 1);
   }
@@ -258,6 +304,10 @@
   /// CHECK-NEXT:                     cmovle/ng
 
   public static long IntNonmatCond_LongVarVar(int a, int b, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return a > b ? x : y;
   }
 
@@ -291,6 +341,10 @@
   /// CHECK-NEXT:                     cmovnz/ne
 
   public static long IntMatCond_LongVarVar(int a, int b, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     long result = (a > b ? x : y);
     return result + (a > b ? 0L : 1L);
   }
@@ -310,6 +364,10 @@
   /// CHECK:                          cmovle/ngq
 
   public static long LongNonmatCond_LongVarVar(long a, long b, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return a > b ? x : y;
   }
 
@@ -334,6 +392,10 @@
   /// CHECK:                          cmovnz/neq
 
   public static long LongMatCond_LongVarVar(long a, long b, long x, long y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     long result = (a > b ? x : y);
     return result + (a > b ? 0L : 1L);
   }
@@ -349,6 +411,10 @@
   /// CHECK-NEXT:            csel le
 
   public static int FloatLtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return a > b ? x : y;
   }
 
@@ -363,6 +429,10 @@
   /// CHECK-NEXT:            csel hs
 
   public static int FloatGtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return a < b ? x : y;
   }
 
@@ -377,6 +447,10 @@
   /// CHECK-NEXT:            fcsel hs
 
   public static float FloatGtNonmatCond_FloatVarVar(float a, float b, float x, float y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     return a < b ? x : y;
   }
 
@@ -393,6 +467,10 @@
   /// CHECK-NEXT:            csel le
 
   public static int FloatLtMatCond_IntVarVar(float a, float b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     int result = (a > b ? x : y);
     return result + (a > b ? 0 : 1);
   }
@@ -410,6 +488,10 @@
   /// CHECK-NEXT:            csel hs
 
   public static int FloatGtMatCond_IntVarVar(float a, float b, int x, int y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     int result = (a < b ? x : y);
     return result + (a < b ? 0 : 1);
   }
@@ -427,10 +509,70 @@
   /// CHECK-NEXT:            fcsel hs
 
   public static float FloatGtMatCond_FloatVarVar(float a, float b, float x, float y) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
     float result = (a < b ? x : y);
     return result + (a < b ? 0 : 1);
   }
 
+  /// CHECK-START: int Main.BoolCond_0_m1(boolean) register (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: int Main.BoolCond_0_m1(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     cmp {{w\d+}}, #0x0 (0)
+  /// CHECK-NEXT:                     csetm {{w\d+}}, eq
+
+  /// CHECK-START-X86_64: int Main.BoolCond_0_m1(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  /// CHECK-START-X86: int Main.BoolCond_0_m1(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_0_m1(boolean cond) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? 0 : -1;
+  }
+
+  /// CHECK-START: int Main.BoolCond_m1_0(boolean) register (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START-ARM64: int Main.BoolCond_m1_0(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     cmp {{w\d+}}, #0x0 (0)
+  /// CHECK-NEXT:                     csetm {{w\d+}}, ne
+
+  /// CHECK-START-X86_64: int Main.BoolCond_m1_0(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  /// CHECK-START-X86: int Main.BoolCond_m1_0(boolean) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_m1_0(boolean cond) {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return cond ? -1 : 0;
+  }
+
   public static void assertEqual(int expected, int actual) {
     if (expected != actual) {
       throw new Error("Assertion failed: " + expected + " != " + actual);
@@ -499,5 +641,10 @@
     assertEqual(8, FloatGtMatCond_FloatVarVar(3, 2, 5, 7));
     assertEqual(8, FloatGtMatCond_FloatVarVar(Float.NaN, 2, 5, 7));
     assertEqual(8, FloatGtMatCond_FloatVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(0, BoolCond_0_m1(true));
+    assertEqual(-1, BoolCond_0_m1(false));
+    assertEqual(-1, BoolCond_m1_0(true));
+    assertEqual(0, BoolCond_m1_0(false));
   }
 }
diff --git a/test/576-polymorphic-inlining/src/Main.java b/test/576-polymorphic-inlining/src/Main.java
index d8d09af..5763d89 100644
--- a/test/576-polymorphic-inlining/src/Main.java
+++ b/test/576-polymorphic-inlining/src/Main.java
@@ -65,11 +65,11 @@
   public void willOnlyInlineForMainVoid() {
   }
 
-  public Class willInlineWithReturnValue() {
+  public Class<?> willInlineWithReturnValue() {
     return Main.class;
   }
 
-  public Class willOnlyInlineForMainWithReturnValue() {
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
     return Main.class;
   }
   public static boolean doThrow;
@@ -83,21 +83,21 @@
   public void willInlineVoid() {
   }
 
-  public Class willInlineWithReturnValue() {
+  public Class<?> willInlineWithReturnValue() {
     return SubMain.class;
   }
 
-  public Class willOnlyInlineForMainWithReturnValue() {
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
     return SubMain.class;
   }
 }
 
 class SubSubMain extends SubMain {
-  public Class willInlineWithReturnValue() {
+  public Class<?> willInlineWithReturnValue() {
     return SubSubMain.class;
   }
 
-  public Class willOnlyInlineForMainWithReturnValue() {
+  public Class<?> willOnlyInlineForMainWithReturnValue() {
     return SubSubMain.class;
   }
 }
diff --git a/test/577-profile-foreign-dex/src/Main.java b/test/577-profile-foreign-dex/src/Main.java
index 0cd85b5..ed7a625 100644
--- a/test/577-profile-foreign-dex/src/Main.java
+++ b/test/577-profile-foreign-dex/src/Main.java
@@ -111,11 +111,11 @@
   }
 
   private static void loadDexFile(String dexFile) throws Exception {
-    Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+    Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
     if (pathClassLoader == null) {
         throw new RuntimeException("Couldn't find path class loader class");
     }
-    Constructor constructor =
+    Constructor<?> constructor =
         pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
     constructor.newInstance(
             dexFile, ClassLoader.getSystemClassLoader());
@@ -125,7 +125,7 @@
     private static final Method registerAppInfoMethod;
     static {
       try {
-        Class c = Class.forName("dalvik.system.VMRuntime");
+        Class<?> c = Class.forName("dalvik.system.VMRuntime");
         registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
             String.class, String.class, String[].class, String.class);
       } catch (Exception e) {
diff --git a/test/580-checker-round/src/Main.java b/test/580-checker-round/src/Main.java
index 9e248ef..83bc55c 100644
--- a/test/580-checker-round/src/Main.java
+++ b/test/580-checker-round/src/Main.java
@@ -36,7 +36,8 @@
     expectEquals32(-2, round32(-1.51f));
     expectEquals32(-1, round32(-1.2f));
     expectEquals32(-1, round32(-1.0f));
-    expectEquals32(-1, round32(-0.51f));
+    expectEquals32(-1, round32(-0.5000001f));
+    expectEquals32(0, round32(-0.5f));
     expectEquals32(0, round32(-0.2f));
     expectEquals32(0, round32(-0.0f));
     expectEquals32(0, round32(+0.0f));
@@ -47,11 +48,23 @@
     expectEquals32(2, round32(+1.5f));
     expectEquals32(2147483647, round32(Float.POSITIVE_INFINITY));
 
+    // Near minint.
+    expectEquals32(-2147483648, round32(Math.nextAfter(-2147483648.0f, Float.NEGATIVE_INFINITY)));
+    expectEquals32(-2147483648, round32(-2147483648.0f));
+    expectEquals32(-2147483520, round32(Math.nextAfter(-2147483648.0f, Float.POSITIVE_INFINITY)));
+
+    // Near maxint.
+    expectEquals32(2147483520, round32(Math.nextAfter(2147483648.0f, Float.NEGATIVE_INFINITY)));
+    expectEquals32(2147483647, round32(2147483648.0f));
+    expectEquals32(2147483647, round32(Math.nextAfter(2147483648.0f, Float.POSITIVE_INFINITY)));
+
     // Some others.
     for (int i = -100; i <= 100; ++i) {
       expectEquals32(i - 1, round32((float) i - 0.51f));
+      expectEquals32(i, round32((float) i - 0.5f));
       expectEquals32(i, round32((float) i));
       expectEquals32(i + 1, round32((float) i + 0.5f));
+      expectEquals32(i + 1, round32((float) i + 0.51f));
     }
     for (float f = -1.5f; f <= -1.499f; f = Math.nextAfter(f, Float.POSITIVE_INFINITY)) {
       expectEquals32(-1, round32(f));
@@ -61,8 +74,10 @@
     float[] fvals = {
       -16777215.5f,
       -16777215.0f,
-      -0.4999f,
-      0.4999f,
+      -0.49999998f,
+      -0.4999999701976776123046875f,
+      0.4999999701976776123046875f,
+      0.49999998f,
       16777215.0f,
       16777215.5f
     };
@@ -71,6 +86,8 @@
       -16777215,
       0,
       0,
+      0,
+      0,
       16777215,
       16777216
     };
@@ -98,7 +115,8 @@
     expectEquals64(-2L, round64(-1.51d));
     expectEquals64(-1L, round64(-1.2d));
     expectEquals64(-1L, round64(-1.0d));
-    expectEquals64(-1L, round64(-0.51d));
+    expectEquals64(-1L, round64(-0.5000001f));
+    expectEquals64(0L, round64(-0.5d));
     expectEquals64(0L, round64(-0.2d));
     expectEquals64(0L, round64(-0.0d));
     expectEquals64(0L, round64(+0.0d));
@@ -109,11 +127,27 @@
     expectEquals64(2L, round64(+1.5d));
     expectEquals64(9223372036854775807L, round64(Double.POSITIVE_INFINITY));
 
+    // Near minlong.
+    expectEquals64(-9223372036854775808L,
+        round64(Math.nextAfter(-9223372036854775808.0, Double.NEGATIVE_INFINITY)));
+    expectEquals64(-9223372036854775808L, round64(-9223372036854775808.0));
+    expectEquals64(-9223372036854774784L,
+        round64(Math.nextAfter(-9223372036854775809.0, Double.POSITIVE_INFINITY)));
+
+    // Near maxlong.
+    expectEquals64(9223372036854774784L,
+        round64(Math.nextAfter(9223372036854775808.0, Double.NEGATIVE_INFINITY)));
+    expectEquals64(9223372036854775807L, round64(9223372036854775808.0));
+    expectEquals64(9223372036854775807L,
+        round64(Math.nextAfter(9223372036854775808.0, Double.POSITIVE_INFINITY)));
+
     // Some others.
     for (long l = -100; l <= 100; ++l) {
       expectEquals64(l - 1, round64((double) l - 0.51d));
+      expectEquals64(l, round64((double) l - 0.5d));
+      expectEquals64(l, round64((double) l));
       expectEquals64(l + 1, round64((double) l + 0.5d));
-      expectEquals64(l + 1, round64((double) l + 0.5d));
+      expectEquals64(l + 1, round64((double) l + 0.51d));
     }
     for (double d = -1.5d; d <= -1.49999999999d; d = Math.nextAfter(d, Double.POSITIVE_INFINITY)) {
       expectEquals64(-1L, round64(d));
@@ -123,8 +157,10 @@
     double[] dvals = {
       -9007199254740991.5d,
       -9007199254740991.0d,
+      -0.49999999999999997d,
       -0.49999999999999994d,
       0.49999999999999994d,
+      0.49999999999999997d,
       9007199254740991.0d,
       9007199254740991.5d
     };
@@ -133,6 +169,8 @@
       -9007199254740991L,
       0L,
       0L,
+      0L,
+      0L,
       9007199254740991L,
       9007199254740992L
     };
diff --git a/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali b/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali
index 7dbd9da..186f0ab 100644
--- a/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali
+++ b/test/588-checker-irreducible-lifetime-hole/smali/IrreducibleLoop.smali
@@ -16,7 +16,7 @@
 
 .super Ljava/lang/Object;
 
-## CHECK-START-X86: int IrreducibleLoop.simpleLoop1(int) dead_code_elimination (before)
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop1(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
 ## CHECK-DAG:                     Goto irreducible:true
@@ -57,7 +57,7 @@
    return v0
 .end method
 
-## CHECK-START-X86: int IrreducibleLoop.simpleLoop2(int) dead_code_elimination (before)
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop2(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
 ## CHECK-DAG:                     Goto irreducible:true
diff --git a/test/591-checker-regression-dead-loop/src/Main.java b/test/591-checker-regression-dead-loop/src/Main.java
index 6d9fcf8..19856cf 100644
--- a/test/591-checker-regression-dead-loop/src/Main.java
+++ b/test/591-checker-regression-dead-loop/src/Main.java
@@ -17,7 +17,7 @@
 class Main {
   private static boolean $inline$false() { return false; }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-START: void Main.main(java.lang.String[]) dead_code_elimination$initial (before)
   /// CHECK-DAG:     <<Const0:i\d+>> IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
   /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Const0>>,<<Add:i\d+>>] loop:{{B\d+}}
diff --git a/test/593-checker-boolean-to-integral-conv/src/Main.java b/test/593-checker-boolean-to-integral-conv/src/Main.java
index ba65839..b4c91c8 100644
--- a/test/593-checker-boolean-to-integral-conv/src/Main.java
+++ b/test/593-checker-boolean-to-integral-conv/src/Main.java
@@ -46,7 +46,7 @@
   /// CHECK-DAG:     <<IToS:b\d+>>          TypeConversion [<<Sel>>]
   /// CHECK-DAG:                            Return [<<IToS>>]
 
-  /// CHECK-START: byte Main.booleanToByte(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: byte Main.booleanToByte(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:                            Return [<<Arg>>]
 
@@ -72,7 +72,7 @@
   /// CHECK-DAG:     <<IToS:s\d+>>          TypeConversion [<<Sel>>]
   /// CHECK-DAG:                            Return [<<IToS>>]
 
-  /// CHECK-START: short Main.booleanToShort(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: short Main.booleanToShort(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:                            Return [<<Arg>>]
 
@@ -98,7 +98,7 @@
   /// CHECK-DAG:     <<IToC:c\d+>>          TypeConversion [<<Sel>>]
   /// CHECK-DAG:                            Return [<<IToC>>]
 
-  /// CHECK-START: char Main.booleanToChar(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: char Main.booleanToChar(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:                            Return [<<Arg>>]
 
@@ -122,7 +122,7 @@
   /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
   /// CHECK-DAG:                            Return [<<Sel>>]
 
-  /// CHECK-START: int Main.booleanToInt(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.booleanToInt(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:                            Return [<<Arg>>]
 
@@ -148,7 +148,7 @@
   /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Sel>>]
   /// CHECK-DAG:                            Return [<<IToJ>>]
 
-  /// CHECK-START: long Main.booleanToLong(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: long Main.booleanToLong(boolean) instruction_simplifier$after_bce (after)
   /// CHECK:         <<Arg:z\d+>>           ParameterValue
   /// CHECK-DAG:     <<ZToJ:j\d+>>          TypeConversion [<<Arg>>]
   /// CHECK-DAG:                            Return [<<ZToJ>>]
@@ -185,7 +185,7 @@
   /// CHECK-DAG:     <<JToI:i\d+>>          TypeConversion [<<IToJ>>]
   /// CHECK-DAG:                            Return [<<JToI>>]
 
-  /// CHECK-START: int Main.longToIntOfBoolean() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.longToIntOfBoolean() instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
   /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
   /// CHECK-DAG:                            Return [<<Sget>>]
diff --git a/test/594-checker-array-alias/expected.txt b/test/594-checker-array-alias/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/594-checker-array-alias/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/594-checker-array-alias/info.txt b/test/594-checker-array-alias/info.txt
new file mode 100644
index 0000000..57c6de5
--- /dev/null
+++ b/test/594-checker-array-alias/info.txt
@@ -0,0 +1 @@
+Tests on array parameters with and without alias.
diff --git a/test/594-checker-array-alias/src/Main.java b/test/594-checker-array-alias/src/Main.java
new file mode 100644
index 0000000..5ece2e2
--- /dev/null
+++ b/test/594-checker-array-alias/src/Main.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+//
+// Test on array parameters with or without potential aliasing.
+//
+public class Main {
+
+  //
+  // Cross-over on parameters with potential aliasing on parameters.
+  // The arrays a and b may point to the same memory, which (without
+  // further runtime tests) prevents hoisting the seemingly invariant
+  // array reference.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoop1(int[], int[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop1(int[], int[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop1(int a[], int b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop2(float[], float[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop2(float[], float[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop2(float a[], float b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop3(long[], long[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop3(long[], long[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop3(long a[], long b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop4(double[], double[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop4(double[], double[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop4(double a[], double b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  //
+  // False cross-over on parameters. Parameters have same width (which used to
+  // cause a false type aliasing in an older version of the compiler), but since
+  // the types are different cannot be aliased. Thus, the invariant array
+  // reference can be hoisted.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1(int[], float[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop1(int[], float[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop1(int a[], float b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (int) b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2(float[], int[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop2(float[], int[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop2(float a[], int b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3(long[], double[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop3(long[], double[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop3(long a[], double b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (long) b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4(double[], long[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop4(double[], long[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop4(double a[], long b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    int[] aI = new int[100];
+    float[] aF = new float[100];
+    long[] aJ = new long[100];
+    double[] aD = new double[100];
+
+    // Type I.
+    CrossOverLoop1(aI, aI);
+    for (int i = 0; i < aI.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aI[i]);
+    }
+    // Type F.
+    CrossOverLoop2(aF, aF);
+    for (int i = 0; i < aF.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aF[i]);
+    }
+    // Type J.
+    CrossOverLoop3(aJ, aJ);
+    for (int i = 0; i < aJ.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aJ[i]);
+    }
+    // Type D.
+    CrossOverLoop4(aD, aD);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aD[i]);
+    }
+
+    // Type I vs F.
+    FalseCrossOverLoop1(aI, aF);
+    for (int i = 0; i < aI.length; i++) {
+      expectEquals(-106, aI[i]);
+    }
+    // Type F vs I.
+    FalseCrossOverLoop2(aF, aI);
+    for (int i = 0; i < aF.length; i++) {
+      expectEquals(-106, aF[i]);
+    }
+    // Type J vs D.
+    FalseCrossOverLoop3(aJ, aD);
+    for (int i = 0; i < aJ.length; i++) {
+      expectEquals(-106, aJ[i]);
+    }
+    // Type D vs J.
+    FalseCrossOverLoop4(aD, aJ);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals(-106, aD[i]);
+    }
+
+    // Real-world example where incorrect type assignment could introduce a bug.
+    // The library sorting algorithm is heavy on array reads and writes, and
+    // assigning the wrong J/D type to one of these would introduce errors.
+    for (int i = 0; i < aD.length; i++) {
+      aD[i] = aD.length - i - 1;
+    }
+    Arrays.sort(aD);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals((double) i, aD[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/595-profile-saving/run b/test/595-profile-saving/run
index f12fac9..068ad03 100644
--- a/test/595-profile-saving/run
+++ b/test/595-profile-saving/run
@@ -16,13 +16,12 @@
 
 # Use
 # --compiler-filter=interpret-only to make sure that the test is not compiled AOT
-# -XOatFileManagerCompilerFilter:interpret-only  to make sure the test is not compiled
-#   when loaded (by PathClassLoader)
+# and to make sure the test is not compiled  when loaded (by PathClassLoader)
 # -Xjitsaveprofilinginfo to enable profile saving
 # -Xusejit:false to disable jit and only test profiles.
 exec ${RUN} \
   -Xcompiler-option --compiler-filter=interpret-only \
-  --runtime-option -XOatFileManagerCompilerFilter:interpret-only \
+  --runtime-option '-Xcompiler-option --compiler-filter=interpret-only' \
   --runtime-option -Xjitsaveprofilinginfo \
   --runtime-option -Xusejit:false \
   "${@}"
diff --git a/test/596-app-images/app_images.cc b/test/596-app-images/app_images.cc
new file mode 100644
index 0000000..a5bbf5f
--- /dev/null
+++ b/test/596-app-images/app_images.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <pthread.h>
+#include <stdio.h>
+#include <vector>
+
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
+#include "gc/space/space-inl.h"
+#include "image.h"
+#include "jni.h"
+#include "mirror/class.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+namespace {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkAppImageLoaded(JNIEnv*, jclass) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (auto* space : Runtime::Current()->GetHeap()->GetContinuousSpaces()) {
+    if (space->IsImageSpace()) {
+      auto* image_space = space->AsImageSpace();
+      const auto& image_header = image_space->GetImageHeader();
+      if (image_header.IsAppImage()) {
+        return JNI_TRUE;
+      }
+    }
+  }
+  return JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkAppImageContains(JNIEnv*, jclass, jclass c) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass_ptr = soa.Decode<mirror::Class*>(c);
+  for (auto* space : Runtime::Current()->GetHeap()->GetContinuousSpaces()) {
+    if (space->IsImageSpace()) {
+      auto* image_space = space->AsImageSpace();
+      const auto& image_header = image_space->GetImageHeader();
+      if (image_header.IsAppImage()) {
+        if (image_space->HasAddress(klass_ptr)) {
+          return JNI_TRUE;
+        }
+      }
+    }
+  }
+  return JNI_FALSE;
+}
+
+}  // namespace
+
+}  // namespace art
diff --git a/test/596-app-images/expected.txt b/test/596-app-images/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/596-app-images/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/596-app-images/info.txt b/test/596-app-images/info.txt
new file mode 100644
index 0000000..a3d5e7e
--- /dev/null
+++ b/test/596-app-images/info.txt
@@ -0,0 +1 @@
+Tests that app-images are loaded and used.
diff --git a/test/596-app-images/src/Main.java b/test/596-app-images/src/Main.java
new file mode 100644
index 0000000..75b31b8
--- /dev/null
+++ b/test/596-app-images/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  static class Inner {
+    public static int abc = 0;
+  }
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    if (!checkAppImageLoaded()) {
+      System.out.println("App image is not loaded!");
+    } else if (!checkAppImageContains(Inner.class)) {
+      System.out.println("App image does not contain Inner!");
+    }
+  }
+
+  public static native boolean checkAppImageLoaded();
+  public static native boolean checkAppImageContains(Class<?> klass);
+}
diff --git a/test/600-verifier-fails/expected.txt b/test/600-verifier-fails/expected.txt
index b0aad4d..974b995 100644
--- a/test/600-verifier-fails/expected.txt
+++ b/test/600-verifier-fails/expected.txt
@@ -1 +1,6 @@
-passed
+passed A
+passed B
+passed C
+passed D
+passed E
+passed F
diff --git a/test/600-verifier-fails/info.txt b/test/600-verifier-fails/info.txt
index 478dd9b..23f3ebc 100644
--- a/test/600-verifier-fails/info.txt
+++ b/test/600-verifier-fails/info.txt
@@ -1,4 +1,23 @@
-The situation in this test was discovered by running dexfuzz on
-another fuzzingly random generated Java test. The soft verification
-fail (on the final field modification) should not hide the hard
-verification fail (on the type mismatch) to avoid a crash later on.
+The situations in these tests were discovered by running the mutating
+dexfuzz on the DEX files of fuzzingly random generated Java test.
+
+(A) b/28908555:
+    soft verification failure (on the final field modification) should
+    not hide the hard verification failure (on the type mismatch) to
+    avoid compiler crash later on
+(B) b/29070461:
+    hard verification failure (not calling super in constructor) should
+    bail immediately and not allow soft verification failures to pile up
+    behind it to avoid fatal message later on
+(C) b/29068831:
+    access validation on field should occur prior to null reference check
+(D) b/29126870:
+    soft verification failure (cannot access) should not hide the hard
+    verification failure (non-reference type) to avoid a compiler crash
+    later on
+(E) b/29068831:
+    access validation on method should occur prior to null reference check
+(F) b/29758098:
+    new-instance of java.lang.Class should throw an IllegalAccessError to
+    avoid interpreter crash on zero size object later
+
diff --git a/test/955-lambda-smali/run b/test/600-verifier-fails/smali/class.smali
old mode 100755
new mode 100644
similarity index 66%
copy from test/955-lambda-smali/run
copy to test/600-verifier-fails/smali/class.smali
index 2fb2f89..b2eb254
--- a/test/955-lambda-smali/run
+++ b/test/600-verifier-fails/smali/class.smali
@@ -1,6 +1,5 @@
-#!/bin/bash
 #
-# Copyright (C) 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --experimental lambdas
+.class public LF;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    new-instance v0, Ljava/lang/Class;
+    return-void
+.end method
diff --git a/test/955-lambda-smali/run b/test/600-verifier-fails/smali/construct.smali
old mode 100755
new mode 100644
similarity index 66%
copy from test/955-lambda-smali/run
copy to test/600-verifier-fails/smali/construct.smali
index 2fb2f89..417ced9
--- a/test/955-lambda-smali/run
+++ b/test/600-verifier-fails/smali/construct.smali
@@ -1,6 +1,5 @@
-#!/bin/bash
 #
-# Copyright (C) 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --experimental lambdas
+.class public LB;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    if-eqz v0, :bail
+    invoke-direct {v0}, LB;->append(Ljava/lang/String;)V
+:bail
+    return-void
+.end method
diff --git a/test/955-lambda-smali/run b/test/600-verifier-fails/smali/iget.smali
old mode 100755
new mode 100644
similarity index 64%
copy from test/955-lambda-smali/run
copy to test/600-verifier-fails/smali/iget.smali
index 2fb2f89..5c045e6
--- a/test/955-lambda-smali/run
+++ b/test/600-verifier-fails/smali/iget.smali
@@ -1,6 +1,5 @@
-#!/bin/bash
 #
-# Copyright (C) 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --experimental lambdas
+.class public LD;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 2
+    iget v1, v0, LMain;->privateField:I
+    return-void
+.end method
diff --git a/test/955-lambda-smali/run b/test/600-verifier-fails/smali/invoke.smali
old mode 100755
new mode 100644
similarity index 64%
copy from test/955-lambda-smali/run
copy to test/600-verifier-fails/smali/invoke.smali
index 2fb2f89..616d63c
--- a/test/955-lambda-smali/run
+++ b/test/600-verifier-fails/smali/invoke.smali
@@ -1,6 +1,5 @@
-#!/bin/bash
 #
-# Copyright (C) 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --experimental lambdas
+.class public LE;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 0
+    invoke-virtual {v0}, LMain;->privateMethod()V
+    return-void
+.end method
diff --git a/test/955-lambda-smali/run b/test/600-verifier-fails/smali/iput.smali
old mode 100755
new mode 100644
similarity index 62%
copy from test/955-lambda-smali/run
copy to test/600-verifier-fails/smali/iput.smali
index 2fb2f89..bd8b928
--- a/test/955-lambda-smali/run
+++ b/test/600-verifier-fails/smali/iput.smali
@@ -1,6 +1,5 @@
-#!/bin/bash
 #
-# Copyright (C) 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --experimental lambdas
+.class public LC;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 0
+    iput-object v0, v0, LMain;->staticPrivateField:Ljava/lang/String;
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/sput.smali b/test/600-verifier-fails/smali/sput.smali
index 87f3799..e8e56ac 100644
--- a/test/600-verifier-fails/smali/sput.smali
+++ b/test/600-verifier-fails/smali/sput.smali
@@ -18,6 +18,6 @@
 
 .method public foo(I)V
 .registers 2
-    sput v1, LMain;->staticField:Ljava/lang/String;
+    sput v1, LMain;->staticFinalField:Ljava/lang/String;
     return-void
 .end method
diff --git a/test/600-verifier-fails/src/Main.java b/test/600-verifier-fails/src/Main.java
index ba4cc31..1726bc4 100644
--- a/test/600-verifier-fails/src/Main.java
+++ b/test/600-verifier-fails/src/Main.java
@@ -14,17 +14,31 @@
  * limitations under the License.
  */
 
-import java.lang.reflect.Method;
-
 public class Main {
 
-  public static final String staticField = null;
+  public static final String staticFinalField = null;
+
+  private static String staticPrivateField = null;
+
+  private int privateField = 0;
+
+  private void privateMethod() { }
+
+  private static void test(String name) throws Exception {
+    try {
+      Class<?> a = Class.forName(name);
+      a.newInstance();
+    } catch (java.lang.LinkageError e) {
+      System.out.println("passed " + name);
+    }
+  }
 
   public static void main(String[] args) throws Exception {
-    try {
-      Class<?> a = Class.forName("A");
-    } catch (java.lang.VerifyError e) {
-      System.out.println("passed");
-    }
+    test("A");
+    test("B");
+    test("C");
+    test("D");
+    test("E");
+    test("F");
   }
 }
diff --git a/test/601-method-access/src/Main.java b/test/601-method-access/src/Main.java
index 838080a..9d9e568 100644
--- a/test/601-method-access/src/Main.java
+++ b/test/601-method-access/src/Main.java
@@ -22,7 +22,7 @@
 public class Main {
   public static void main(String[] args) {
     try {
-      Class c = Class.forName("SubClassUsingInaccessibleMethod");
+      Class<?> c = Class.forName("SubClassUsingInaccessibleMethod");
       Object o = c.newInstance();
       c.getMethod("test").invoke(o, null);
     } catch (InvocationTargetException ite) {
diff --git a/test/602-deoptimizeable/expected.txt b/test/602-deoptimizeable/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/602-deoptimizeable/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/602-deoptimizeable/info.txt b/test/602-deoptimizeable/info.txt
new file mode 100644
index 0000000..d0952f9
--- /dev/null
+++ b/test/602-deoptimizeable/info.txt
@@ -0,0 +1 @@
+Test various cases for full/partial-fragment deoptimization.
diff --git a/test/602-deoptimizeable/src/Main.java b/test/602-deoptimizeable/src/Main.java
new file mode 100644
index 0000000..743a579
--- /dev/null
+++ b/test/602-deoptimizeable/src/Main.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+
+class DummyObject {
+    public static boolean sHashCodeInvoked = false;
+    private int i;
+
+    public DummyObject(int i) {
+        this.i = i;
+    }
+
+    public boolean equals(Object obj) {
+        return (obj instanceof DummyObject) && (i == ((DummyObject)obj).i);
+    }
+
+    public int hashCode() {
+        sHashCodeInvoked = true;
+        Main.assertIsManaged();
+        Main.deoptimizeAll();
+        Main.assertIsInterpreted();
+        Main.assertCallerIsManaged();  // Caller is from framework code HashMap.
+        return i % 64;
+    }
+}
+
+public class Main {
+    static boolean sFlag = false;
+
+    public static native void deoptimizeAll();
+    public static native void undeoptimizeAll();
+    public static native void assertIsInterpreted();
+    public static native void assertIsManaged();
+    public static native void assertCallerIsInterpreted();
+    public static native void assertCallerIsManaged();
+    public static native void disableStackFrameAsserts();
+    public static native boolean hasOatFile();
+    public static native boolean isInterpreted();
+
+    public static void execute(Runnable runnable) throws Exception {
+      Thread t = new Thread(runnable);
+      t.start();
+      t.join();
+    }
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+        // Only test stack frames in compiled mode.
+        if (!hasOatFile() || isInterpreted()) {
+          disableStackFrameAsserts();
+        }
+        final HashMap<DummyObject, Long> map = new HashMap<DummyObject, Long>();
+
+        // Single-frame deoptimization that covers partial fragment.
+        execute(new Runnable() {
+            public void run() {
+                int[] arr = new int[3];
+                assertIsManaged();
+                int res = $noinline$run1(arr);
+                assertIsManaged();  // Only single frame is deoptimized.
+                if (res != 79) {
+                    System.out.println("Failure 1!");
+                    System.exit(0);
+                }
+            }
+        });
+
+        // Single-frame deoptimization that covers a full fragment.
+        execute(new Runnable() {
+            public void run() {
+                try {
+                    int[] arr = new int[3];
+                    assertIsManaged();
+                    // Use reflection to call $noinline$run2 so that it does
+                    // full-fragment deoptimization since that is an upcall.
+                    Class<?> cls = Class.forName("Main");
+                    Method method = cls.getDeclaredMethod("$noinline$run2", int[].class);
+                    double res = (double)method.invoke(Main.class, arr);
+                    assertIsManaged();  // Only single frame is deoptimized.
+                    if (res != 79.3d) {
+                        System.out.println("Failure 2!");
+                        System.exit(0);
+                    }
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        // Full-fragment deoptimization.
+        execute(new Runnable() {
+            public void run() {
+                assertIsManaged();
+                float res = $noinline$run3B();
+                assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
+                if (res != 0.034f) {
+                    System.out.println("Failure 3!");
+                    System.exit(0);
+                }
+            }
+        });
+
+        undeoptimizeAll();  // Make compiled code useable again.
+
+        // Partial-fragment deoptimization.
+        execute(new Runnable() {
+            public void run() {
+                try {
+                    assertIsManaged();
+                    map.put(new DummyObject(10), Long.valueOf(100));
+                    assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        undeoptimizeAll();  // Make compiled code useable again.
+
+        if (!DummyObject.sHashCodeInvoked) {
+            System.out.println("hashCode() method not invoked!");
+        }
+        if (map.get(new DummyObject(10)) != 100) {
+            System.out.println("Wrong hashmap value!");
+        }
+        System.out.println("Finishing");
+    }
+
+    public static int $noinline$run1(int[] arr) {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        boolean caught = false;
+        // BCE will use deoptimization for the code below.
+        try {
+            arr[0] = 1;
+            arr[1] = 1;
+            arr[2] = 1;
+            // This causes AIOOBE and triggers deoptimization from compiled code.
+            arr[3] = 1;
+        } catch (ArrayIndexOutOfBoundsException e) {
+            assertIsInterpreted(); // Single-frame deoptimization triggered.
+            caught = true;
+        }
+        if (!caught) {
+            System.out.println("Expected exception");
+        }
+        assertIsInterpreted();
+        return 79;
+    }
+
+    public static double $noinline$run2(int[] arr) {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        boolean caught = false;
+        // BCE will use deoptimization for the code below.
+        try {
+            arr[0] = 1;
+            arr[1] = 1;
+            arr[2] = 1;
+            // This causes AIOOBE and triggers deoptimization from compiled code.
+            arr[3] = 1;
+        } catch (ArrayIndexOutOfBoundsException e) {
+            assertIsInterpreted();  // Single-frame deoptimization triggered.
+            caught = true;
+        }
+        if (!caught) {
+            System.out.println("Expected exception");
+        }
+        assertIsInterpreted();
+        return 79.3d;
+    }
+
+    public static float $noinline$run3A() {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        // Deoptimize callers.
+        deoptimizeAll();
+        assertIsInterpreted();
+        assertCallerIsInterpreted();  // $noinline$run3B is deoptimizeable.
+        return 0.034f;
+    }
+
+    public static float $noinline$run3B() {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        float res = $noinline$run3A();
+        assertIsInterpreted();
+        return res;
+    }
+}
diff --git a/test/604-hot-static-interface/src/Main.java b/test/604-hot-static-interface/src/Main.java
index 04d7cd6..a26623c 100644
--- a/test/604-hot-static-interface/src/Main.java
+++ b/test/604-hot-static-interface/src/Main.java
@@ -29,7 +29,7 @@
     }
   }
 
-  private static native void ensureJitCompiled(Class itf, String method_name);
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
 }
 
 interface Itf {
diff --git a/test/605-new-string-from-bytes/src/Main.java b/test/605-new-string-from-bytes/src/Main.java
index bd24b50..5bd6c5d 100644
--- a/test/605-new-string-from-bytes/src/Main.java
+++ b/test/605-new-string-from-bytes/src/Main.java
@@ -20,7 +20,7 @@
 public class Main {
 
   public static void main(String[] args) throws Exception {
-    Class c = Class.forName("java.lang.StringFactory");
+    Class<?> c = Class.forName("java.lang.StringFactory");
     Method m = c.getDeclaredMethod("newStringFromBytes", byte[].class, int.class);
 
     // Loop over allocations to get more chances of doing GC while in the
@@ -37,6 +37,8 @@
         } else {
           throw e;
         }
+      } catch (OutOfMemoryError e) {
+        // Ignore, this is a stress test.
       }
     }
   }
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/607-daemon-stress/expected.txt
similarity index 100%
rename from test/525-checker-arrays-and-fields/expected.txt
rename to test/607-daemon-stress/expected.txt
diff --git a/test/607-daemon-stress/info.txt b/test/607-daemon-stress/info.txt
new file mode 100644
index 0000000..1047b76
--- /dev/null
+++ b/test/607-daemon-stress/info.txt
@@ -0,0 +1,3 @@
+Stress test for daemon threads stuck in a method that requires the thread list lock.
+(for example Thread.isInterrupted). The shutdown thread used to block those daemons
+from making progress.
diff --git a/test/607-daemon-stress/src/Main.java b/test/607-daemon-stress/src/Main.java
new file mode 100644
index 0000000..56ef410
--- /dev/null
+++ b/test/607-daemon-stress/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main extends Thread {
+  public static void main(String[] args) throws Exception {
+    for (int i = 0; i < 5; i++) {
+      Main m = new Main();
+      m.setDaemon(true);
+      m.start();
+    }
+    // Sleep a while to give some time for the threads to start.
+    Thread.sleep(1000);
+  }
+
+  public void run() {
+    while (!isInterrupted());
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/609-checker-inline-interface/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/609-checker-inline-interface/expected.txt
diff --git a/test/609-checker-inline-interface/info.txt b/test/609-checker-inline-interface/info.txt
new file mode 100644
index 0000000..35eee08
--- /dev/null
+++ b/test/609-checker-inline-interface/info.txt
@@ -0,0 +1,2 @@
+Checker test that we inline interface calls and if we can't inline
+them, we can turn them into a virtual invoke.
diff --git a/test/609-checker-inline-interface/src/Main.java b/test/609-checker-inline-interface/src/Main.java
new file mode 100644
index 0000000..413f2dd
--- /dev/null
+++ b/test/609-checker-inline-interface/src/Main.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class Main implements Interface {
+
+  static void methodWithInvokeInterface(Interface interf) {
+    interf.doCall();
+  }
+
+  public void doCall() {
+    if (doThrow) throw new Error("");
+  }
+
+  public static void main(String[] args) {
+    testInlineInterfaceCall();
+    testInterfaceToVirtualCall();
+  }
+
+  /// CHECK-START: void Main.testInlineInterfaceCall() inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect method_name:Main.methodWithInvokeInterface
+
+  /// CHECK-START: void Main.testInlineInterfaceCall() inliner (before)
+  /// CHECK-NOT:                      InvokeInterface
+
+  /// CHECK-START: void Main.testInlineInterfaceCall() inliner (after)
+  /// CHECK:                          InvokeInterface method_name:Interface.doCall
+
+  /// CHECK-START: void Main.testInlineInterfaceCall() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  public static void testInlineInterfaceCall() {
+    methodWithInvokeInterface(itf);
+  }
+
+  /// CHECK-START: void Main.testInterfaceToVirtualCall() inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect method_name:Main.methodWithInvokeInterface
+
+  /// CHECK-START: void Main.testInterfaceToVirtualCall() inliner (before)
+  /// CHECK-NOT:                      InvokeInterface
+
+  /// CHECK-START: void Main.testInterfaceToVirtualCall() inliner (after)
+  /// CHECK:                          InvokeVirtual method_name:Main.doCall
+
+  /// CHECK-START: void Main.testInterfaceToVirtualCall() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeInterface
+  public static void testInterfaceToVirtualCall() {
+    methodWithInvokeInterface(m);
+  }
+
+  static Interface itf = new Main();
+  static Main m = new Main();
+  static boolean doThrow = false;
+}
+
+interface Interface {
+  public void doCall();
+}
diff --git a/test/609-checker-x86-bounds-check/expected.txt b/test/609-checker-x86-bounds-check/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/609-checker-x86-bounds-check/info.txt b/test/609-checker-x86-bounds-check/info.txt
new file mode 100644
index 0000000..c0f26d0
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/info.txt
@@ -0,0 +1 @@
+Checker test that we combine ArrayLength and BoundsCheck on x86/x86_64.
diff --git a/test/609-checker-x86-bounds-check/src/Main.java b/test/609-checker-x86-bounds-check/src/Main.java
new file mode 100644
index 0000000..bfc2be8
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/src/Main.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String args[]) {
+    int[] array = new int[51];
+    testArrayLengthBoundsCheckX86(array, 10);
+
+    System.out.println("passed");
+  }
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (before)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) disassembly (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK:         <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-NEXT:    <<Length:i\d+>>        ArrayLength [<<Array>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-NEXT:    <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-NEXT:                           cmp [<<BaseReg:\w+>> + 8], <<IndexReg:\w+>>
+  /// CHECK:         <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Value>>]
+  /// CHECK-NEXT:                           mov [<<BaseReg>> + <<IndexReg>> * 4 + 12], 9
+
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (before)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  // Test assumes parameter value is in lower 8 registers (it is passed in edx).
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) disassembly (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK:         <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-NEXT:    <<Length:i\d+>>        ArrayLength [<<Array>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-NEXT:    <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-NEXT:                           cmp [<<BaseReg:\w+>> + 8], e<<IndexReg:\w+>>
+  /// CHECK:         <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Value>>]
+  /// CHECK-NEXT:                           mov [<<BaseReg>> + r<<IndexReg>> * 4 + 12], 9
+
+  static void testArrayLengthBoundsCheckX86(int[] array, int index) {
+    array[index] = 9;
+  }
+}
diff --git a/test/611-checker-simplify-if/expected.txt b/test/611-checker-simplify-if/expected.txt
new file mode 100644
index 0000000..3083c4c
--- /dev/null
+++ b/test/611-checker-simplify-if/expected.txt
@@ -0,0 +1,7 @@
+54
+54
+54
+12
+12
+12
+33
diff --git a/test/611-checker-simplify-if/info.txt b/test/611-checker-simplify-if/info.txt
new file mode 100644
index 0000000..b090db8
--- /dev/null
+++ b/test/611-checker-simplify-if/info.txt
@@ -0,0 +1 @@
+Checker tests for the 'if' simplification in the compiler.
diff --git a/test/611-checker-simplify-if/src/Main.java b/test/611-checker-simplify-if/src/Main.java
new file mode 100644
index 0000000..7dac007
--- /dev/null
+++ b/test/611-checker-simplify-if/src/Main.java
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    testNoInline(args);
+    System.out.println(staticField);
+    testInline(args);
+    System.out.println(staticField);
+    testNonConstantInputs(args);
+    System.out.println(staticField);
+    testNonConstantEqual(args);
+    System.out.println(staticField);
+    testGreaterCondition(args);
+    System.out.println(staticField);
+    testSwitch(args);
+    System.out.println(staticField);
+    testFP(args);
+    System.out.println(staticField);
+  }
+
+  // Test when a condition is the input of the if.
+
+  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK: <<Const0:i\d+>>   IntConstant 0
+  /// CHECK:                   If
+  /// CHECK: <<Phi:i\d+>>      Phi
+  /// CHECK: <<Equal:z\d+>>    Equal [<<Phi>>,<<Const0>>]
+  /// CHECK:                   If [<<Equal>>]
+
+  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK:      If
+  /// CHECK-NOT:  Phi
+  /// CHECK-NOT:  Equal
+  /// CHECK-NOT:  If
+  public static void testNoInline(String[] args) {
+    boolean myVar = false;
+    if (args.length == 42) {
+      myVar = true;
+    } else {
+      staticField = 32;
+      myVar = false;
+    }
+    if (myVar) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test when the phi is the input of the if.
+
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (before)
+  /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:                   If
+  /// CHECK-DAG: <<Phi:i\d+>>      Phi
+  /// CHECK-DAG:                   If [<<Phi>>]
+
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (after)
+  /// CHECK:      If
+  /// CHECK-NOT:  Phi
+  /// CHECK-NOT:  If
+  public static void testInline(String[] args) {
+    boolean myVar = $inline$doTest(args);
+    if (myVar) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  public static boolean $inline$doTest(String[] args) {
+    boolean myVar;
+    if (args.length == 42) {
+      myVar = true;
+    } else {
+      staticField = 32;
+      myVar = false;
+    }
+    return myVar;
+  }
+
+  // Test when one input is not a constant. We can only optimize the constant input.
+
+  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Phi>>,<<Const42>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+
+  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-NOT:                          Phi
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<StaticFieldGet>>,<<Const42>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+  public static void testNonConstantInputs(String[] args) {
+    int a = 42;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = otherStaticField;
+    }
+    if (a == 42) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test with a condition.
+
+  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const22:i\d+>>         IntConstant 22
+  /// CHECK-DAG: <<Const25:i\d+>>         IntConstant 25
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<Const22>>]
+  /// CHECK-DAG: <<GE:z\d+>>              GreaterThanOrEqual [<<Phi>>,<<Const25>>]
+  /// CHECK-DAG:                          If [<<GE>>]
+
+  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK-DAG:                          If
+  /// CHECK-NOT:                          Phi
+  /// CHECK-NOT:                          GreaterThanOrEqual
+  /// CHECK-NOT:                          If
+  public static void testGreaterCondition(String[] args) {
+    int a = 42;;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = 22;
+    }
+    if (a < 25) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test when comparing non constants.
+
+  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Phi>>,<<StaticFieldGet>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+
+  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-NOT:                          Phi
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+  public static void testNonConstantEqual(String[] args) {
+    int a = 42;
+    int b = otherStaticField;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = b;
+    }
+    if (a == b) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Make sure we don't "simplify" a loop and potentially turn it into
+  // an irreducible loop. The suspend check at the loop header prevents
+  // us from doing the simplification.
+
+  /// CHECK-START: void Main.testLoop(boolean) disassembly (after)
+  /// CHECK-DAG: SuspendCheck
+  /// CHECK:     irreducible:false
+  /// CHECK-NOT: irreducible:true
+  public static void testLoop(boolean c) {
+    while (true) {
+      if (c) {
+        if ($noinline$foo()) return;
+        c = false;
+      } else {
+        $noinline$foo();
+        c = true;
+      }
+    }
+  }
+
+  static boolean $noinline$foo() {
+    if (doThrow) throw new Error("");
+    return true;
+  }
+
+  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK:      If
+  /// CHECK:      If
+  /// CHECK:      If
+
+  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK:      If
+  /// CHECK:      If
+  /// CHECK-NOT:  If
+  public static void testSwitch(String[] args) {
+    boolean cond = false;
+    switch (args.length) {
+      case 42:
+        staticField = 11;
+        cond = true;
+        break;
+      case 43:
+        staticField = 33;
+        cond = true;
+        break;
+      default:
+        cond = false;
+        break;
+    }
+    if (cond) {
+      // Redirect case 42 and 43 here.
+      staticField = 2;
+    }
+    // Redirect default here.
+  }
+
+  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination$initial (before)
+  /// CHECK:      If
+  /// CHECK:      If
+
+  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination$initial (after)
+  /// CHECK:      If
+  /// CHECK:      If
+  public static void testFP(String[] args) {
+    float f = 2.2f;
+    float nan = $noinline$getNaN();
+    if (args.length == 42) {
+      f = 4.3f;
+    } else {
+      staticField = 33;
+      f = nan;
+    }
+    if (f == nan) {
+      staticField = 5;
+    }
+  }
+
+  // No inline variant to avoid having the compiler see it's a NaN.
+  static float $noinline$getNaN() {
+    if (doThrow) throw new Error("");
+    return Float.NaN;
+  }
+
+  static boolean doThrow;
+  static int staticField;
+  static int otherStaticField;
+}
diff --git a/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
index 1d6158a..fcb314d 100644
--- a/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
+++ b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
@@ -29,7 +29,7 @@
 }
 
 class OtherClass {
-  public static Class getB() {
+  public static Class<?> getB() {
     // This used to return the B class of another class loader.
     return B.class;
   }
diff --git a/test/612-jit-dex-cache/src/Main.java b/test/612-jit-dex-cache/src/Main.java
index 0e4bd22..89ebe09 100644
--- a/test/612-jit-dex-cache/src/Main.java
+++ b/test/612-jit-dex-cache/src/Main.java
@@ -41,7 +41,7 @@
 
 public class Main {
 
-   private static Class classFromDifferentLoader() throws Exception {
+   private static Class<?> classFromDifferentLoader() throws Exception {
      final String DEX_FILE = System.getenv("DEX_LOCATION") + "/612-jit-dex-cache-ex.jar";
      ClassLoader loader = new DelegateLastPathClassLoader(DEX_FILE, Main.class.getClassLoader());
      return loader.loadClass("LoadedByAppClassLoader");
@@ -49,7 +49,7 @@
 
   public static void main(String[] args) throws Exception {
     System.loadLibrary(args[0]);
-    Class cls = classFromDifferentLoader();
+    Class<?> cls = classFromDifferentLoader();
     Method m = cls.getDeclaredMethod("letMeInlineYou", A.class);
     B b = new B();
     // Invoke the method enough times to get an inline cache and get JITted.
@@ -63,5 +63,5 @@
     }
   }
 
-  public static native void ensureJitCompiled(Class cls, String method_name);
+  public static native void ensureJitCompiled(Class<?> cls, String method_name);
 }
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/614-checker-dump-constant-location/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/614-checker-dump-constant-location/expected.txt
diff --git a/test/614-checker-dump-constant-location/info.txt b/test/614-checker-dump-constant-location/info.txt
new file mode 100644
index 0000000..4a94ffa
--- /dev/null
+++ b/test/614-checker-dump-constant-location/info.txt
@@ -0,0 +1,2 @@
+Test that the graph visualizer outputs useful information for constant
+locations in parallel moves.
diff --git a/test/614-checker-dump-constant-location/src/Main.java b/test/614-checker-dump-constant-location/src/Main.java
new file mode 100644
index 0000000..f6bc063
--- /dev/null
+++ b/test/614-checker-dump-constant-location/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static int array_int[] = { 0 };
+  public static long array_long[] = { 0 };
+  public static float array_float[] = { 0.0f };
+  public static double array_double[] = { 0.0 };
+
+  // The code used to print constant locations in parallel moves is architecture
+  // independent. We only test for ARM and ARM64 as it is easy: 'store'
+  // instructions only take registers as a source.
+
+  /// CHECK-START-ARM: void Main.store_to_arrays() register (after)
+  /// CHECK:    ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}}
+
+  /// CHECK-START-ARM64: void Main.store_to_arrays() register (after)
+  /// CHECK:    ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}}
+
+  public void store_to_arrays() {
+    array_int[0] = 1;
+    array_long[0] = 2;
+    array_float[0] = 3.3f;
+    array_double[0] = 4.4;
+  }
+
+  public static void main(String args[]) {}
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/615-checker-arm64-zr-parallel-move/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/615-checker-arm64-zr-parallel-move/expected.txt
diff --git a/test/615-checker-arm64-zr-parallel-move/info.txt b/test/615-checker-arm64-zr-parallel-move/info.txt
new file mode 100644
index 0000000..199755d
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/info.txt
@@ -0,0 +1 @@
+Checker test to verify we correctly use wzr and xzr to synthesize zero constants.
diff --git a/test/615-checker-arm64-zr-parallel-move/src/Main.java b/test/615-checker-arm64-zr-parallel-move/src/Main.java
new file mode 100644
index 0000000..5024f28
--- /dev/null
+++ b/test/615-checker-arm64-zr-parallel-move/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static boolean doThrow = false;
+
+  public void $noinline$foo(int in_w1,
+                            int in_w2,
+                            int in_w3,
+                            int in_w4,
+                            int in_w5,
+                            int in_w6,
+                            int in_w7,
+                            int on_stack_int,
+                            long on_stack_long,
+                            float in_s0,
+                            float in_s1,
+                            float in_s2,
+                            float in_s3,
+                            float in_s4,
+                            float in_s5,
+                            float in_s6,
+                            float in_s7,
+                            float on_stack_float,
+                            double on_stack_double) {
+    if (doThrow) throw new Error();
+  }
+
+  // We expect a parallel move that moves four times the zero constant to stack locations.
+  /// CHECK-START-ARM64: void Main.bar() register (after)
+  /// CHECK:             ParallelMove {{.*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*}}
+
+  // Those four moves should generate four 'store' instructions using directly the zero register.
+  /// CHECK-START-ARM64: void Main.bar() disassembly (after)
+  /// CHECK-DAG:         {{(str|stur)}} wzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} xzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} wzr, [sp, #{{[0-9]+}}]
+  /// CHECK-DAG:         {{(str|stur)}} xzr, [sp, #{{[0-9]+}}]
+
+  public void bar() {
+    $noinline$foo(1, 2, 3, 4, 5, 6, 7,     // Integral values in registers.
+                  0, 0L,                   // Integral values on the stack.
+                  1, 2, 3, 4, 5, 6, 7, 8,  // Floating-point values in registers.
+                  0.0f, 0.0);              // Floating-point values on the stack.
+  }
+
+  public static void main(String args[]) {}
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 8473e06..3bb3725 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -69,4 +69,5 @@
 b/28187158
 b/29778499 (1)
 b/29778499 (2)
+b/30458218
 Done!
diff --git a/test/800-smali/smali/B30458218.smali b/test/800-smali/smali/B30458218.smali
new file mode 100644
index 0000000..67b882a
--- /dev/null
+++ b/test/800-smali/smali/B30458218.smali
@@ -0,0 +1,27 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB30458218;
+.super Ljava/io/InterruptedIOException;
+
+.method public static run()V
+    .registers 2
+    new-instance v0, LB30458218;
+    invoke-direct {v0}, LB30458218;-><init>()V
+
+    # IGET used to wrongly cache 'InterruptedIOException' class under the key 'LB30458218;'
+    iget v1, v0, LB30458218;->bytesTransferred:I
+
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_28187158.smali b/test/800-smali/smali/b_28187158.smali
index 14d5cec..47e5ef6 100644
--- a/test/800-smali/smali/b_28187158.smali
+++ b/test/800-smali/smali/b_28187158.smali
@@ -9,4 +9,3 @@
    iget v0, p0, Ljava/lang/System;->in:Ljava/io/InputStream;
    return-void
 .end method
-
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 75b110b..34f2580 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -174,12 +174,13 @@
         testCases.add(new TestCase("b/27799205 (5)", "B27799205Helper", "run5", null,
                 new VerifyError(), null));
         testCases.add(new TestCase("b/27799205 (6)", "B27799205Helper", "run6", null, null, null));
-        testCases.add(new TestCase("b/28187158", "B28187158", "run", new Object[] { null} ,
+        testCases.add(new TestCase("b/28187158", "B28187158", "run", new Object[] { null },
                 new VerifyError(), null));
         testCases.add(new TestCase("b/29778499 (1)", "B29778499_1", "run", null,
                 new IncompatibleClassChangeError(), null));
         testCases.add(new TestCase("b/29778499 (2)", "B29778499_2", "run", null,
                 new IncompatibleClassChangeError(), null));
+        testCases.add(new TestCase("b/30458218", "B30458218", "run", null, null, null));
     }
 
     public void runTests() {
diff --git a/test/955-lambda-smali/build b/test/900-hello-plugin/build
similarity index 80%
rename from test/955-lambda-smali/build
rename to test/900-hello-plugin/build
index 14230c2..898e2e5 100755
--- a/test/955-lambda-smali/build
+++ b/test/900-hello-plugin/build
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2015 The Android Open Source Project
+# Copyright 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# make us exit on a failure
-set -e
-
-./default-build "$@" --experimental default-methods
+./default-build "$@" --experimental agents
diff --git a/test/900-hello-plugin/expected.txt b/test/900-hello-plugin/expected.txt
new file mode 100644
index 0000000..43db31c
--- /dev/null
+++ b/test/900-hello-plugin/expected.txt
@@ -0,0 +1,8 @@
+ArtPlugin_Initialize called in test 900
+Agent_OnLoad called with options "test_900"
+GetEnvHandler called in test 900
+GetEnvHandler called with version 0x900fffff
+GetEnv returned '900' environment!
+Hello, world!
+Agent_OnUnload called
+ArtPlugin_Deinitialize called in test 900
diff --git a/test/900-hello-plugin/info.txt b/test/900-hello-plugin/info.txt
new file mode 100644
index 0000000..47b15c2
--- /dev/null
+++ b/test/900-hello-plugin/info.txt
@@ -0,0 +1,2 @@
+Tests that agents and plugins are loaded.
+
diff --git a/test/900-hello-plugin/load_unload.cc b/test/900-hello-plugin/load_unload.cc
new file mode 100644
index 0000000..a38cc3d
--- /dev/null
+++ b/test/900-hello-plugin/load_unload.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <jni.h>
+#include <stdio.h>
+
+#include "art_method-inl.h"
+#include "base/logging.h"
+#include "base/macros.h"
+
+namespace art {
+
+constexpr jint TEST_900_ENV_VERSION_NUMBER = 0x900FFFFF;
+constexpr uintptr_t ENV_VALUE = 900;
+
+// Allow this library to be used as a plugin too so we can test the stack.
+static jint GetEnvHandler(JavaVMExt* vm ATTRIBUTE_UNUSED, void** new_env, jint version) {
+  printf("%s called in test 900\n", __func__);
+  if (version != TEST_900_ENV_VERSION_NUMBER) {
+    return JNI_EVERSION;
+  }
+  printf("GetEnvHandler called with version 0x%x\n", version);
+  *new_env = reinterpret_cast<void*>(ENV_VALUE);
+  return JNI_OK;
+}
+
+extern "C" bool ArtPlugin_Initialize() {
+  printf("%s called in test 900\n", __func__);
+  Runtime::Current()->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
+  return true;
+}
+
+extern "C" bool ArtPlugin_Deinitialize() {
+  printf("%s called in test 900\n", __func__);
+  return true;
+}
+
+extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm,
+                                               char* options,
+                                               void* reserved ATTRIBUTE_UNUSED) {
+  printf("Agent_OnLoad called with options \"%s\"\n", options);
+  uintptr_t env = 0;
+  jint res = vm->GetEnv(reinterpret_cast<void**>(&env), TEST_900_ENV_VERSION_NUMBER);
+  if (res != JNI_OK) {
+    printf("GetEnv(TEST_900_ENV_VERSION_NUMBER) returned non-zero\n");
+  }
+  printf("GetEnv returned '%" PRIdPTR "' environment!\n", env);
+  return 0;
+}
+
+extern "C" JNIEXPORT void JNICALL Agent_OnUnload(JavaVM* vm ATTRIBUTE_UNUSED) {
+  printf("Agent_OnUnload called\n");
+}
+
+}  // namespace art
diff --git a/test/955-lambda-smali/build b/test/900-hello-plugin/run
similarity index 60%
copy from test/955-lambda-smali/build
copy to test/900-hello-plugin/run
index 14230c2..35b0871 100755
--- a/test/955-lambda-smali/build
+++ b/test/900-hello-plugin/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2015 The Android Open Source Project
+# Copyright 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# make us exit on a failure
-set -e
-
-./default-build "$@" --experimental default-methods
+plugin=libartagentd.so
+if  [[ "$@" == *"-O"* ]]; then
+  plugin=libartagent.so
+fi
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${plugin}=test_900 \
+                   --android-runtime-option -Xplugin:${plugin}
diff --git a/test/900-hello-plugin/src/Main.java b/test/900-hello-plugin/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/900-hello-plugin/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/901-hello-ti-agent/basics.cc b/test/901-hello-ti-agent/basics.cc
new file mode 100644
index 0000000..81a1b66
--- /dev/null
+++ b/test/901-hello-ti-agent/basics.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "901-hello-ti-agent/basics.h"
+
+#include <jni.h>
+#include <stdio.h>
+#include <string.h>
+#include "base/macros.h"
+#include "openjdkjvmti/jvmti.h"
+
+namespace art {
+namespace Test901HelloTi {
+
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  printf("Loaded Agent for test 901-hello-ti-agent\n");
+  fsync(1);
+  jvmtiEnv* env = nullptr;
+  jvmtiEnv* env2 = nullptr;
+
+#define CHECK_CALL_SUCCESS(c) \
+  do { \
+    if (c != JNI_OK) { \
+      printf("call " #c " did not succeed\n"); \
+      return -1; \
+    } \
+  } while (false)
+
+  CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&env), JVMTI_VERSION_1_0));
+  CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&env2), JVMTI_VERSION_1_0));
+  if (env == env2) {
+    printf("GetEnv returned same environment twice!\n");
+    return -1;
+  }
+  unsigned char* local_data = nullptr;
+  CHECK_CALL_SUCCESS(env->Allocate(8, &local_data));
+  strcpy(reinterpret_cast<char*>(local_data), "hello!!");
+  CHECK_CALL_SUCCESS(env->SetEnvironmentLocalStorage(local_data));
+  unsigned char* get_data = nullptr;
+  CHECK_CALL_SUCCESS(env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&get_data)));
+  if (get_data != local_data) {
+    printf("Got different data from local storage then what was set!\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env2->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&get_data)));
+  if (get_data != nullptr) {
+    printf("env2 did not have nullptr local storage.\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env->Deallocate(local_data));
+  jint version = 0;
+  CHECK_CALL_SUCCESS(env->GetVersionNumber(&version));
+  if ((version & JVMTI_VERSION_1) != JVMTI_VERSION_1) {
+    printf("Unexpected version number!\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env->DisposeEnvironment());
+  CHECK_CALL_SUCCESS(env2->DisposeEnvironment());
+#undef CHECK_CALL_SUCCESS
+  return JNI_OK;
+}
+
+
+}  // namespace Test901HelloTi
+}  // namespace art
diff --git a/test/901-hello-ti-agent/basics.h b/test/901-hello-ti-agent/basics.h
new file mode 100644
index 0000000..f482950
--- /dev/null
+++ b/test/901-hello-ti-agent/basics.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_901_HELLO_TI_AGENT_BASICS_H_
+#define ART_TEST_901_HELLO_TI_AGENT_BASICS_H_
+
+#include <jni.h>
+
+namespace art {
+namespace Test901HelloTi {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test901HelloTi
+}  // namespace art
+
+#endif  // ART_TEST_901_HELLO_TI_AGENT_BASICS_H_
diff --git a/test/955-lambda-smali/build b/test/901-hello-ti-agent/build
similarity index 80%
copy from test/955-lambda-smali/build
copy to test/901-hello-ti-agent/build
index 14230c2..898e2e5 100755
--- a/test/955-lambda-smali/build
+++ b/test/901-hello-ti-agent/build
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2015 The Android Open Source Project
+# Copyright 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# make us exit on a failure
-set -e
-
-./default-build "$@" --experimental default-methods
+./default-build "$@" --experimental agents
diff --git a/test/901-hello-ti-agent/expected.txt b/test/901-hello-ti-agent/expected.txt
new file mode 100644
index 0000000..414eb3b
--- /dev/null
+++ b/test/901-hello-ti-agent/expected.txt
@@ -0,0 +1,2 @@
+Loaded Agent for test 901-hello-ti-agent
+Hello, world!
diff --git a/test/901-hello-ti-agent/info.txt b/test/901-hello-ti-agent/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/901-hello-ti-agent/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/901-hello-ti-agent/run b/test/901-hello-ti-agent/run
new file mode 100755
index 0000000..8079a8c
--- /dev/null
+++ b/test/901-hello-ti-agent/run
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=901-hello-ti-agent \
+                   --android-runtime-option -Xplugin:${plugin}
diff --git a/test/901-hello-ti-agent/src/Main.java b/test/901-hello-ti-agent/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/901-hello-ti-agent/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
deleted file mode 100644
index 16381e4..0000000
--- a/test/955-lambda-smali/expected.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-SanityCheck
-Hello world! (0-args, no closure)
-ABCD Hello world! (4-args, no closure)
-Caught NPE
-(BoxUnbox) Hello boxing world! (0-args, no closure)
-(BoxUnbox) Boxing repeatedly yields referentially-equal objects
-(BoxUnbox) Caught NPE for unbox-lambda
-(BoxUnbox) Caught NPE for box-lambda
-(BoxUnbox) Caught ClassCastException for unbox-lambda
-(MoveResult) testZ success
-(MoveResult) testB success
-(MoveResult) testS success
-(MoveResult) testI success
-(MoveResult) testC success
-(MoveResult) testJ success
-(MoveResult) testF success
-(MoveResult) testD success
-(MoveResult) testL success
-(CaptureVariables) (0-args, 1 captured variable 'Z'): value is true
-(CaptureVariables) (0-args, 1 captured variable 'B'): value is R
-(CaptureVariables) (0-args, 1 captured variable 'C'): value is ∂
-(CaptureVariables) (0-args, 1 captured variable 'S'): value is 1000
-(CaptureVariables) (0-args, 1 captured variable 'I'): value is 12345678
-(CaptureVariables) (0-args, 1 captured variable 'J'): value is 3287471278325742
-(CaptureVariables) (0-args, 1 captured variable 'F'): value is Infinity
-(CaptureVariables) (0-args, 1 captured variable 'D'): value is -Infinity
-(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is true,R,∂,1000,12345678,3287471278325742,Infinity,-Infinity
-(CaptureVariables) Caught NPE
diff --git a/test/955-lambda-smali/info.txt b/test/955-lambda-smali/info.txt
deleted file mode 100644
index aed5e84..0000000
--- a/test/955-lambda-smali/info.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Smali-based tests for experimental lambda intructions.
-
-Obviously needs to run under ART.
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
deleted file mode 100644
index 915de2d..0000000
--- a/test/955-lambda-smali/smali/BoxUnbox.smali
+++ /dev/null
@@ -1,168 +0,0 @@
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LBoxUnbox;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-    .registers 0
-
-    invoke-static {}, LBoxUnbox;->testBox()V
-    invoke-static {}, LBoxUnbox;->testBoxEquality()V
-    invoke-static {}, LBoxUnbox;->testFailures()V
-    invoke-static {}, LBoxUnbox;->testFailures2()V
-    invoke-static {}, LBoxUnbox;->testFailures3()V
-    invoke-static {}, LBoxUnbox;->forceGC()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorld(J)V
-    .registers 4 # 1 wide parameters, 2 locals
-
-    const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)"
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-# Test boxing and unboxing; the same lambda should be invoked as if there was no box.
-.method private static testBox()V
-    .registers 3
-
-    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
-    box-lambda v2, v0 # v2 = box(v0)
-    unbox-lambda v0, v2, J # v0 = unbox(v2)
-    invoke-lambda v0, {}
-
-    return-void
-.end method
-
-# Test that boxing the same lambda twice yield the same object.
-.method private static testBoxEquality()V
-   .registers 6 # 0 parameters, 6 locals
-
-    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
-    box-lambda v2, v0 # v2 = box(v0)
-    box-lambda v3, v0 # v3 = box(v0)
-
-    # The objects should be not-null, and they should have the same reference
-    if-eqz v2, :is_zero
-    if-ne v2, v3, :is_not_equal
-
-    const-string v4, "(BoxUnbox) Boxing repeatedly yields referentially-equal objects"
-    goto :end
-
-:is_zero
-    const-string v4, "(BoxUnbox) Boxing repeatedly FAILED: boxing returned null"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(BoxUnbox) Boxing repeatedly FAILED: objects were not same reference"
-    goto :end
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    unbox-lambda v2, v0, J
-    # attempting to unbox a null lambda will throw NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "(BoxUnbox) Caught NPE for unbox-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures2()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    box-lambda v2, v0  # attempting to box a null lambda will throw NPE
-:end
-    return-void
-
-    # TODO: refactor testFailures using a goto
-
-:handler
-    const-string v2, "(BoxUnbox) Caught NPE for box-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures3()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const-string v0, "This is not a boxed lambda"
-:start
-    # TODO: use \FunctionalType; here instead
-    unbox-lambda v2, v0, J
-    # can't use a string, expects a lambda object here. throws ClassCastException.
-:end
-    return-void
-
-    # TODO: refactor testFailures using a goto
-
-:handler
-    const-string v2, "(BoxUnbox) Caught ClassCastException for unbox-lambda"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/ClassCastException; {:start .. :end} :handler
-.end method
-
-
-# Force a GC. Used to ensure our weak reference table of boxed lambdas is getting swept.
-.method private static forceGC()V
-    .registers 1
-    invoke-static {}, Ljava/lang/Runtime;->getRuntime()Ljava/lang/Runtime;
-    move-result-object v0
-    invoke-virtual {v0}, Ljava/lang/Runtime;->gc()V
-
-    return-void
-.end method
diff --git a/test/955-lambda-smali/smali/CaptureVariables.smali b/test/955-lambda-smali/smali/CaptureVariables.smali
deleted file mode 100644
index f18b7ff..0000000
--- a/test/955-lambda-smali/smali/CaptureVariables.smali
+++ /dev/null
@@ -1,311 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LCaptureVariables;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    # Test boolean capture
-    const v2, 1           # v2 = true
-    capture-variable v2, "Z"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_Z(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test byte capture
-    const v2, 82       # v2 = 82, 'R'
-    capture-variable v2, "B"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_B(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test char capture
-    const v2, 0x2202       # v2 = 0x2202, '∂'
-    capture-variable v2, "C"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_C(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test short capture
-    const v2, 1000 # v2 = 1000
-    capture-variable v2, "S"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_S(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test int capture
-    const v2, 12345678
-    capture-variable v2, "I"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_I(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test long capture
-    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
-    capture-variable v2, "J"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_J(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test float capture
-    const v2, infinityf
-    capture-variable v2, "F"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_F(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Test double capture
-    const-wide v2, -infinity
-    capture-variable v2, "D"
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_D(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    #TODO: capture objects and lambdas once we have support for it
-
-    # Test capturing multiple variables
-    invoke-static {}, LCaptureVariables;->testMultipleCaptures()V
-
-    # Test failures
-    invoke-static {}, LCaptureVariables;->testFailures()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_Z(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'Z'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "Z"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Z)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_B(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'B'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "B"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V  # no println(B), use char instead.
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_C(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'C'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "C"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_S(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'S'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "S"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V  # no println(S), use int instead
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_I(J)V
-    .registers 5 # 1 wide parameter, 3 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'I'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "I"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_J(J)V
-    .registers 6 # 1 wide parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'J'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "J"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(J)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_F(J)V
-    .registers 5 # 1 parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'F'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "F"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(F)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_D(J)V
-    .registers 6 # 1 wide parameter, 4 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'D'): value is "
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "D"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
-
-    return-void
-.end method
-
-# Test capturing more than one variable.
-.method private static testMultipleCaptures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v2, 1           # v2 = true
-    capture-variable v2, "Z"
-
-    const v2, 82       # v2 = 82, 'R'
-    capture-variable v2, "B"
-
-    const v2, 0x2202       # v2 = 0x2202, '∂'
-    capture-variable v2, "C"
-
-    const v2, 1000 # v2 = 1000
-    capture-variable v2, "S"
-
-    const v2, 12345678
-    capture-variable v2, "I"
-
-    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
-    capture-variable v2, "J"
-
-    const v2, infinityf
-    capture-variable v2, "F"
-
-    const-wide v2, -infinity
-    capture-variable v2, "D"
-
-    create-lambda v0, LCaptureVariables;->printCapturedVariable_ZBCSIJFD(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-.end method
-
-#TODO: should use a closure type instead of a long
-.method public static printCapturedVariable_ZBCSIJFD(J)V
-    .registers 7 # 1 wide parameter, 5 locals
-
-    const-string v0, "(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is "
-    const-string v4, ","
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "Z"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(Z)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "B"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "C"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "S"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "I"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "J"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->print(J)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "F"
-    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(F)V
-    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    liberate-variable v2, p0, "D"
-    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
-
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    liberate-variable v0, v2, "Z" # invoking a null lambda shall raise an NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "(CaptureVariables) Caught NPE"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
deleted file mode 100644
index 9892d61..0000000
--- a/test/955-lambda-smali/smali/Main.smali
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LMain;
-
-.super Ljava/lang/Object;
-
-.method public static main([Ljava/lang/String;)V
-    .registers 2
-
-    invoke-static {}, LSanityCheck;->run()I
-    invoke-static {}, LTrivialHelloWorld;->run()V
-    invoke-static {}, LBoxUnbox;->run()V
-    invoke-static {}, LMoveResult;->run()V
-    invoke-static {}, LCaptureVariables;->run()V
-
-# TODO: add tests when verification fails
-
-    return-void
-.end method
diff --git a/test/955-lambda-smali/smali/MoveResult.smali b/test/955-lambda-smali/smali/MoveResult.smali
deleted file mode 100644
index 52f7ba3..0000000
--- a/test/955-lambda-smali/smali/MoveResult.smali
+++ /dev/null
@@ -1,330 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LMoveResult;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    invoke-static {}, LMoveResult;->testZ()V
-    invoke-static {}, LMoveResult;->testB()V
-    invoke-static {}, LMoveResult;->testS()V
-    invoke-static {}, LMoveResult;->testI()V
-    invoke-static {}, LMoveResult;->testC()V
-    invoke-static {}, LMoveResult;->testJ()V
-    invoke-static {}, LMoveResult;->testF()V
-    invoke-static {}, LMoveResult;->testD()V
-    invoke-static {}, LMoveResult;->testL()V
-
-    return-void
-.end method
-
-# Test that booleans are returned correctly via move-result.
-.method public static testZ()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaZ(J)Z
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 1
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testZ success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testZ failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testZ. Always returns "true".
-.method public static lambdaZ(J)Z
-    .registers 3
-
-    const v0, 1
-    return v0
-
-.end method
-
-# Test that bytes are returned correctly via move-result.
-.method public static testB()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaB(J)B
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 15
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testB success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testB failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testB. Always returns "15".
-.method public static lambdaB(J)B
-    .registers 3 # 1 parameters, 2 locals
-
-    const v0, 15
-    return v0
-
-.end method
-
-# Test that shorts are returned correctly via move-result.
-.method public static testS()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaS(J)S
-    invoke-lambda v0, {}
-    move-result v2
-    const/16 v3, 31000
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testS success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testS failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testS. Always returns "31000".
-.method public static lambdaS(J)S
-    .registers 3
-
-    const/16 v0, 31000
-    return v0
-
-.end method
-
-# Test that ints are returned correctly via move-result.
-.method public static testI()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaI(J)I
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 128000
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testI success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testI failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testI. Always returns "128000".
-.method public static lambdaI(J)I
-    .registers 3
-
-    const v0, 128000
-    return v0
-
-.end method
-
-# Test that chars are returned correctly via move-result.
-.method public static testC()V
-    .registers 7
-
-    create-lambda v0, LMoveResult;->lambdaC(J)C
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, 65535
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testC success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testC failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testC. Always returns "65535".
-.method public static lambdaC(J)C
-    .registers 3
-
-    const v0, 65535
-    return v0
-
-.end method
-
-# Test that longs are returned correctly via move-result.
-.method public static testJ()V
-    .registers 9
-
-    create-lambda v0, LMoveResult;->lambdaJ(J)J
-    invoke-lambda v0, {}
-    move-result v2
-    const-wide v4, 0xdeadf00dc0ffeeL
-
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testJ success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testJ failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testC. Always returns "0xdeadf00dc0ffeeL".
-.method public static lambdaJ(J)J
-    .registers 5
-
-    const-wide v0, 0xdeadf00dc0ffeeL
-    return-wide v0
-
-.end method
-
-# Test that floats are returned correctly via move-result.
-.method public static testF()V
-    .registers 6
-
-    create-lambda v0, LMoveResult;->lambdaF(J)F
-    invoke-lambda v0, {}
-    move-result v2
-    const v3, infinityf
-
-    if-ne v3, v2, :is_not_equal
-    const-string v4, "(MoveResult) testF success"
-    goto :end
-
-:is_not_equal
-    const-string v4, "(MoveResult) testF failed"
-
-:end
-    sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v5, v4}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testF. Always returns "infinityf".
-.method public static lambdaF(J)F
-    .registers 4
-
-    const v0, infinityf
-    return v0
-
-.end method
-
-# Test that doubles are returned correctly via move-result.
-.method public static testD()V
-    .registers 8
-
-    create-lambda v0, LMoveResult;->lambdaD(J)D
-    invoke-lambda v0, {}
-    move-result-wide v2
-    const-wide v4, -infinity
-
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testD success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testD failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testD. Always returns "infinity".
-.method public static lambdaD(J)D
-    .registers 5
-
-    const-wide v0, -infinity
-    return-wide v0
-
-.end method
-
-
-# Test that objects are returned correctly via move-result.
-.method public static testL()V
-    .registers 8
-
-    create-lambda v0, LMoveResult;->lambdaL(J)Ljava/lang/String;
-    invoke-lambda v0, {}
-    move-result-object v2
-    const-string v4, "Interned string"
-
-    # relies on string interning returning identical object references
-    if-ne v4, v2, :is_not_equal
-    const-string v6, "(MoveResult) testL success"
-    goto :end
-
-:is_not_equal
-    const-string v6, "(MoveResult) testL failed"
-
-:end
-    sget-object v7, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v7, v6}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    return-void
-
-.end method
-
-# Lambda target for testL. Always returns "Interned string" (string).
-.method public static lambdaL(J)Ljava/lang/String;
-    .registers 5
-
-    const-string v0, "Interned string"
-    return-object v0
-
-.end method
-
-
diff --git a/test/955-lambda-smali/smali/SanityCheck.smali b/test/955-lambda-smali/smali/SanityCheck.smali
deleted file mode 100644
index 4c807d7..0000000
--- a/test/955-lambda-smali/smali/SanityCheck.smali
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LSanityCheck;
-.super Ljava/lang/Object;
-
-
-.method public constructor <init>()V
-.registers 1
-   invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-   return-void
-.end method
-
-# This test is just here to make sure that we can at least execute basic non-lambda
-# functionality such as printing (when lambdas are enabled in the runtime).
-.method public static run()I
-# Don't use too many registers here to avoid hitting the Stack::SanityCheck frame<2KB assert
-.registers 3
-    const-string v0, "SanityCheck"
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-    const v2, 123456
-    return v2
-.end method
diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
deleted file mode 100644
index 3444b13..0000000
--- a/test/955-lambda-smali/smali/TrivialHelloWorld.smali
+++ /dev/null
@@ -1,94 +0,0 @@
-#
-#  Copyright (C) 2015 The Android Open Source Project
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-.class public LTrivialHelloWorld;
-.super Ljava/lang/Object;
-
-.method public constructor <init>()V
-.registers 1
-    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
-    return-void
-.end method
-
-.method public static run()V
-.registers 8
-    # Trivial 0-arg hello world
-    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(J)V
-    # TODO: create-lambda should not write to both v0 and v1
-    invoke-lambda v0, {}
-
-    # Slightly more interesting 4-arg hello world
-    create-lambda v2, doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
-    # TODO: create-lambda should not write to both v2 and v3
-    const-string v4, "A"
-    const-string v5, "B"
-    const-string v6, "C"
-    const-string v7, "D"
-    invoke-lambda v2, {v4, v5, v6, v7}
-
-    invoke-static {}, LTrivialHelloWorld;->testFailures()V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of jlong. 
-.method public static doHelloWorld(J)V
-    .registers 5 # 1 wide parameters, 3 locals
-
-    const-string v0, "Hello world! (0-args, no closure)"
-
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-#TODO: should use a closure type instead of jlong. 
-.method public static doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
-    .registers 9 # 1 wide parameter, 4 narrow parameters, 3 locals
-
-    const-string v0, " Hello world! (4-args, no closure)"
-    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
-
-    invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-    invoke-virtual {v1, p5}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
-
-    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-.end method
-
-# Test exceptions are thrown as expected when used opcodes incorrectly
-.method private static testFailures()V
-    .registers 4 # 0 parameters, 4 locals
-
-    const v0, 0  # v0 = null
-    const v1, 0  # v1 = null
-:start
-    invoke-lambda v0, {}  # invoking a null lambda shall raise an NPE
-:end
-    return-void
-
-:handler
-    const-string v2, "Caught NPE"
-    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
-
-    return-void
-
-    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
-.end method
diff --git a/test/Android.libartagent.mk b/test/Android.libartagent.mk
new file mode 100644
index 0000000..729de3f
--- /dev/null
+++ b/test/Android.libartagent.mk
@@ -0,0 +1,101 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common_build.mk
+
+LIBARTAGENT_COMMON_SRC_FILES := \
+    900-hello-plugin/load_unload.cc
+
+# $(1): target or host
+# $(2): debug or <empty>
+define build-libartagent
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+  ifneq ($(2),debug)
+    ifneq ($(2),)
+      $$(error d or empty for argument 2, received $(2))
+    endif
+    suffix := d
+  else
+    suffix :=
+  endif
+
+  art_target_or_host := $(1)
+
+  include $(CLEAR_VARS)
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  LOCAL_MODULE := libartagent$$(suffix)
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_MODULE_TAGS := tests
+  endif
+  LOCAL_SRC_FILES := $(LIBARTAGENT_COMMON_SRC_FILES)
+  LOCAL_SHARED_LIBRARIES += libart$$(suffix) libbacktrace libnativehelper
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libartagent.mk
+  ifeq ($$(art_target_or_host),target)
+    $(call set-target-local-clang-vars)
+    ifeq ($$(suffix),d)
+      $(call set-target-local-cflags-vars,debug)
+    else
+      $(call set-target-local-cflags-vars,ndebug)
+    endif
+    LOCAL_SHARED_LIBRARIES += libdl
+    LOCAL_MULTILIB := both
+    LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
+    LOCAL_MODULE_PATH_64 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_64)
+    LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    ifeq ($$(suffix),d)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
+    endif
+    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_IS_HOST_MODULE := true
+    LOCAL_MULTILIB := both
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+
+  # Clear locally used variables.
+  art_target_or_host :=
+  suffix :=
+endef
+
+ifeq ($(ART_BUILD_TARGET),true)
+  $(eval $(call build-libartagent,target,))
+  $(eval $(call build-libartagent,target,debug))
+endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(eval $(call build-libartagent,host,))
+  $(eval $(call build-libartagent,host,debug))
+endif
+
+# Clear locally used variables.
+LOCAL_PATH :=
+LIBARTAGENT_COMMON_SRC_FILES :=
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 5b1af27..ec5b7d2 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -25,6 +25,7 @@
   004-SignalTest/signaltest.cc \
   004-ReferenceMap/stack_walk_refmap_jni.cc \
   004-StackWalk/stack_walk_jni.cc \
+  004-ThreadStress/thread_stress.cc \
   004-UnsafeTest/unsafe_test.cc \
   044-proxy/native_proxy.cc \
   051-thread/thread_test.cc \
@@ -35,6 +36,7 @@
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
   148-multithread-gc-annotations/gc_coverage.cc \
+  149-suspend-all-stress/suspend_all.cc \
   454-get-vreg/get_vreg_jni.cc \
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
@@ -44,6 +46,7 @@
   566-polymorphic-inlining/polymorphic_inline.cc \
   570-checker-osr/osr.cc \
   595-profile-saving/profile-saving.cc \
+  596-app-images/app_images.cc \
   597-deopt-new-string/deopt.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
@@ -83,8 +86,12 @@
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libarttest.mk
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
-    $(call set-target-local-cflags-vars,debug)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
+    ifeq ($$(suffix),d)
+      $(call set-target-local-cflags-vars,debug)
+    else
+      $(call set-target-local-cflags-vars,ndebug)
+    endif
     LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_MULTILIB := both
     LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
@@ -94,13 +101,15 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
     ifeq ($$(suffix),d)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_LDLIBS := -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
     include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/test/Android.libnativebridgetest.mk b/test/Android.libnativebridgetest.mk
index e8cc7e4..5c97e4d 100644
--- a/test/Android.libnativebridgetest.mk
+++ b/test/Android.libnativebridgetest.mk
@@ -48,7 +48,7 @@
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libnativebridgetest.mk
   ifeq ($$(art_target_or_host),target)
-    $(call set-target-local-clang-vars)
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
     $(call set-target-local-cflags-vars,debug)
     LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_STATIC_LIBRARIES := libgtest
@@ -60,9 +60,9 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) $(ART_HOST_DEBUG_ASFLAGS)
     LOCAL_SHARED_LIBRARIES := libcutils
-    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_LDLIBS := -ldl -lpthread
     ifeq ($(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
     endif
diff --git a/test/Android.libtiagent.mk b/test/Android.libtiagent.mk
new file mode 100644
index 0000000..626dc3b
--- /dev/null
+++ b/test/Android.libtiagent.mk
@@ -0,0 +1,102 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common_build.mk
+
+LIBARTAGENT_COMMON_SRC_FILES := \
+    ti-agent/common_load.cc \
+    901-hello-ti-agent/basics.cc
+
+# $(1): target or host
+# $(2): debug or <empty>
+define build-libtiagent
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+  ifneq ($(2),debug)
+    ifneq ($(2),)
+      $$(error d or empty for argument 2, received $(2))
+    endif
+    suffix := d
+  else
+    suffix :=
+  endif
+
+  art_target_or_host := $(1)
+
+  include $(CLEAR_VARS)
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  LOCAL_MODULE := libtiagent$$(suffix)
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_MODULE_TAGS := tests
+  endif
+  LOCAL_SRC_FILES := $(LIBARTAGENT_COMMON_SRC_FILES)
+  LOCAL_SHARED_LIBRARIES += libart$$(suffix) libbacktrace libnativehelper libopenjdkjvmti$$(suffix)
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime art/test
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libtiagent.mk
+  ifeq ($$(art_target_or_host),target)
+    $(call set-target-local-clang-vars)
+    ifeq ($$(suffix),d)
+      $(call set-target-local-cflags-vars,debug)
+    else
+      $(call set-target-local-cflags-vars,ndebug)
+    endif
+    LOCAL_SHARED_LIBRARIES += libdl
+    LOCAL_MULTILIB := both
+    LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
+    LOCAL_MODULE_PATH_64 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_64)
+    LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    ifeq ($$(suffix),d)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
+    endif
+    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_IS_HOST_MODULE := true
+    LOCAL_MULTILIB := both
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+
+  # Clear locally used variables.
+  art_target_or_host :=
+  suffix :=
+endef
+
+ifeq ($(ART_BUILD_TARGET),true)
+  $(eval $(call build-libtiagent,target,))
+  $(eval $(call build-libtiagent,target,debug))
+endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(eval $(call build-libtiagent,host,))
+  $(eval $(call build-libtiagent,host,debug))
+endif
+
+# Clear locally used variables.
+LOCAL_PATH :=
+LIBARTAGENT_COMMON_SRC_FILES :=
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index b690f63..75c4f34 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -26,7 +26,8 @@
 
 # The path where build only targets will be output, e.g.
 # out/target/product/generic_x86_64/obj/PACKAGING/art-run-tests_intermediates/DATA
-art_run_tests_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
+art_run_tests_build_dir := $(call intermediates-dir-for,JAVA_LIBRARIES,art-run-tests)/DATA
+art_run_tests_install_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
 
 # A generated list of prerequisites that call 'run-test --build-only', the actual prerequisite is
 # an empty file touched in the intermediate directory.
@@ -49,7 +50,8 @@
 # Helper to create individual build targets for tests. Must be called with $(eval).
 # $(1): the test number
 define define-build-art-run-test
-  dmart_target := $(art_run_tests_dir)/art-run-tests/$(1)/touch
+  dmart_target := $(art_run_tests_build_dir)/art-run-tests/$(1)/touch
+  dmart_install_target := $(art_run_tests_install_dir)/art-run-tests/$(1)/touch
   run_test_options = --build-only
   ifeq ($(ART_TEST_QUIET),true)
     run_test_options += --quiet
@@ -67,8 +69,13 @@
 	  $(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1)
 	$(hide) touch $$@
 
-  TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_target)
+$$(dmart_install_target): $$(dmart_target)
+	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
+	$(hide) cp $$(dir $$<)/* $$(dir $$@)/
+
+  TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_install_target)
   dmart_target :=
+  dmart_install_target :=
   run_test_options :=
 endef
 $(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call define-build-art-run-test,$(test))))
@@ -78,12 +85,13 @@
 LOCAL_MODULE := art-run-tests
 LOCAL_ADDITIONAL_DEPENDENCIES := $(TEST_ART_RUN_TEST_BUILD_RULES)
 # The build system use this flag to pick up files generated by declare-make-art-run-test.
-LOCAL_PICKUP_FILES := $(art_run_tests_dir)
+LOCAL_PICKUP_FILES := $(art_run_tests_install_dir)
 
 include $(BUILD_PHONY_PACKAGE)
 
 # Clear temp vars.
-art_run_tests_dir :=
+art_run_tests_build_dir :=
+art_run_tests_install_dir :=
 define-build-art-run-test :=
 TEST_ART_RUN_TEST_BUILD_RULES :=
 
@@ -111,8 +119,14 @@
 ifeq ($(ART_TEST_JIT),true)
   COMPILER_TYPES += jit
 endif
+OPTIMIZING_COMPILER_TYPES :=
 ifeq ($(ART_TEST_OPTIMIZING),true)
   COMPILER_TYPES += optimizing
+  OPTIMIZING_COMPILER_TYPES += optimizing
+endif
+ifeq ($(ART_TEST_OPTIMIZING_GRAPH_COLOR),true)
+  COMPILER_TYPES += regalloc_gc
+  OPTIMIZING_COMPILER_TYPES += regalloc_gc
 endif
 RELOCATE_TYPES := relocate
 ifeq ($(ART_TEST_RUN_TEST_NO_RELOCATE),true)
@@ -209,10 +223,13 @@
         $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(ART_TEST_RUN_TEST_SKIP), $(ALL_ADDRESS_SIZES))
 
 
-# Disable 137-cfi (b/27391690).
+# Disable 149-suspend-all-stress, its output is flaky (b/28988206).
 # Disable 577-profile-foreign-dex (b/27454772).
+# Disable 552-checker-sharpening, until compiler component of new string dex cache is added (@cwadsworth, @vmarko)
 TEST_ART_BROKEN_ALL_TARGET_TESTS := \
+  149-suspend-all-stress \
   577-profile-foreign-dex \
+  552-checker-sharpening \
 
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
     $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
@@ -467,14 +484,28 @@
 
 TEST_ART_BROKEN_JIT_RUN_TESTS :=
 
+# Known broken tests for the graph coloring register allocator.
+# These tests were based on the linear scan allocator, which makes different decisions than
+# the graph coloring allocator. (These attempt to test for code quality, not correctness.)
+TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR := \
+  570-checker-select \
+  484-checker-register-hints
+
+ifneq (,$(filter regalloc_gc,$(COMPILER_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      regalloc_gc,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+      $(TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR),$(ALL_ADDRESS_SIZES))
+endif
+
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
     510-checker-try-catch \
 
 ifeq (mips,$(TARGET_ARCH))
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
@@ -486,9 +517,9 @@
 TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
 
 ifeq (mips64,$(TARGET_ARCH))
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
@@ -501,9 +532,9 @@
   454-get-vreg \
   457-regs \
 
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),ndebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
@@ -512,9 +543,9 @@
 # Tests that should fail when the optimizing compiler compiles them debuggable.
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS := \
 
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
       $(IMAGE_TYPES),$(PICTEST_TYPES),debuggable,$(TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
@@ -526,18 +557,27 @@
 # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress
+# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
 #      instruction yet (b/26601270).
-# 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
-#      not yet handled in the read barrier configuration.
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
   484-checker-register-hints \
-  527-checker-array-access-split \
-  537-checker-arraycopy
+  527-checker-array-access-split
 
 # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
 
+# Tests failing in non-Baker read barrier configurations with the Optimizing compiler (AOT).
+# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
+#      handled in non-Baker read barrier configurations.
+TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS := \
+  537-checker-arraycopy
+
+# Tests failing in non-Baker read barrier configurations with JIT (Optimizing compiler).
+# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
+#      handled in non-Baker read barrier configurations.
+TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS := \
+  537-checker-arraycopy
+
 ifeq ($(ART_USE_READ_BARRIER),true)
   ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
@@ -546,11 +586,17 @@
         $(TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
 
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
-        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \
+        $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+    ifneq ($(ART_READ_BARRIER_TYPE),BAKER)
+      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+          $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \
+          $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+          $(TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+    endif
   endif
 
   ifneq (,$(filter jit,$(COMPILER_TYPES)))
@@ -558,21 +604,34 @@
         $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
         $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+    ifneq ($(ART_READ_BARRIER_TYPE),BAKER)
+      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+          $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+          $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+          $(TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+    endif
   endif
 endif
 
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
 
+TEST_ART_BROKEN_NPIC_RUN_TESTS := 596-app-images
+ifneq (,$(filter npictest,$(PICTEST_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      ${COMPILER_TYPES},$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),npictest,$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_NPIC_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+endif
+
 # Tests that should fail in the heap poisoning configuration with the Optimizing compiler.
 # 055: Exceeds run time limits due to heap poisoning instrumentation (on ARM and ARM64 devices).
 TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \
   055-enum-performance
 
 ifeq ($(ART_HEAP_POISONING),true)
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
@@ -620,6 +679,22 @@
 # only once).
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_EXECUTABLES) $(TARGET_CORE_IMG_OUTS)
 
+# Also need libartagent.
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libartagent.so
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libartagentd.so
+ifdef TARGET_2ND_ARCH
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libartagent.so
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libartagentd.so
+endif
+
+# Also need libtiagent.
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libtiagent.so
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libtiagentd.so
+ifdef TARGET_2ND_ARCH
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libtiagent.so
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libtiagentd.so
+endif
+
 # Also need libarttest.
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
@@ -638,6 +713,10 @@
 # specific version depending on the compiler.
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \
   $(ART_HOST_EXECUTABLES) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libtiagent$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libtiagentd$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libartagent$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libartagentd$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
@@ -647,6 +726,10 @@
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libtiagent$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libtiagentd$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libartagent$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libartagentd$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
@@ -719,6 +802,9 @@
   ifeq ($(4),optimizing)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_RULES
     run_test_options += --optimizing
+  else ifeq ($(4),regalloc_gc)
+    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_GRAPH_COLOR_RULES
+    run_test_options += --optimizing -Xcompiler-option --register-allocation-strategy=graph-color
   else
     ifeq ($(4),interpreter)
       test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_RULES
@@ -802,6 +888,10 @@
     endif
   endif
   image_suffix := $(4)
+  ifeq ($(4),regalloc_gc)
+    # Graph coloring tests share the image_suffix with optimizing tests.
+    image_suffix := optimizing
+  endif
   ifeq ($(9),no-image)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_IMAGE_RULES
     run_test_options += --no-image
@@ -1052,5 +1142,10 @@
 RUN_TYPES :=
 DEBUGGABLE_TYPES :=
 
-include $(LOCAL_PATH)/Android.libarttest.mk
-include art/test/Android.libnativebridgetest.mk
+MY_LOCAL_PATH := $(LOCAL_PATH)
+include $(MY_LOCAL_PATH)/Android.libartagent.mk
+include $(MY_LOCAL_PATH)/Android.libtiagent.mk
+include $(MY_LOCAL_PATH)/Android.libarttest.mk
+include $(MY_LOCAL_PATH)/Android.libnativebridgetest.mk
+MY_LOCAL_PATH :=
+LOCAL_PATH :=
diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java
index 19c13f7..45cfd0f 100644
--- a/test/MyClassNatives/MyClassNatives.java
+++ b/test/MyClassNatives/MyClassNatives.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import dalvik.annotation.optimization.FastNative;
+
 class MyClassNatives {
     native void throwException();
     native void foo();
@@ -35,11 +37,11 @@
     static native int getText(long val1, Object obj1, long val2, Object obj2);
     synchronized native Object []getSinkPropertiesNative(String path);
 
-    native Class instanceMethodThatShouldReturnClass();
-    static native Class staticMethodThatShouldReturnClass();
+    native Class<?> instanceMethodThatShouldReturnClass();
+    static native Class<?> staticMethodThatShouldReturnClass();
 
-    native void instanceMethodThatShouldTakeClass(int i, Class c);
-    static native void staticMethodThatShouldTakeClass(int i, Class c);
+    native void instanceMethodThatShouldTakeClass(int i, Class<?> c);
+    static native void staticMethodThatShouldTakeClass(int i, Class<?> c);
 
     native float checkFloats(float f1, float f2);
     native void forceStackParameters(int i1, int i2, int i3, int i4, int i5, int i6, int i8, int i9,
@@ -102,4 +104,9 @@
     static native boolean returnTrue();
     static native boolean returnFalse();
     static native int returnInt();
+
+    // Check for @FastNative annotation presence [or lack of presence].
+    public static native void normalNative();
+    @FastNative
+    public static native void fastNative();
 }
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index e70a95c..ee2ee1a 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -16,6 +16,7 @@
 
 #include "jni.h"
 
+#include "base/enums.h"
 #include "base/logging.h"
 #include "dex_file-inl.h"
 #include "jit/jit.h"
@@ -129,18 +130,18 @@
     return;
   }
 
-  ScopedObjectAccess soa(Thread::Current());
+  ArtMethod* method = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
 
-  ScopedUtfChars chars(env, method_name);
-  CHECK(chars.c_str() != nullptr);
-
-  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
-  ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), sizeof(void*));
+    ScopedUtfChars chars(env, method_name);
+    CHECK(chars.c_str() != nullptr);
+    method = soa.Decode<mirror::Class*>(cls)->FindDeclaredDirectMethodByName(
+        chars.c_str(), kRuntimePointerSize);
+  }
 
   jit::JitCodeCache* code_cache = jit->GetCodeCache();
   OatQuickMethodHeader* header = nullptr;
-  // Make sure there is a profiling info, required by the compiler.
-  ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
   while (true) {
     header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
     if (code_cache->ContainsPc(header->GetCode())) {
@@ -148,6 +149,9 @@
     } else {
       // Sleep to yield to the compiler thread.
       usleep(1000);
+      ScopedObjectAccess soa(Thread::Current());
+      // Make sure there is a profiling info, required by the compiler.
+      ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
       // Will either ensure it's compiled or do the compilation itself.
       jit->CompileMethod(method, soa.Self(), /* osr */ false);
     }
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index 922eae6..85ea1c8 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -37,17 +37,20 @@
   asserts_enabled = false;
 }
 
-
-// public static native boolean isInterpreted();
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass) {
+static jboolean IsInterpreted(JNIEnv* env, jclass, size_t level) {
   ScopedObjectAccess soa(env);
-  NthCallerVisitor caller(soa.Self(), 1, false);
+  NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_TRUE : JNI_FALSE;
 }
 
+// public static native boolean isInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass klass) {
+  return IsInterpreted(env, klass, 1);
+}
+
 // public static native void assertIsInterpreted();
 
 extern "C" JNIEXPORT void JNICALL Java_Main_assertIsInterpreted(JNIEnv* env, jclass klass) {
@@ -56,10 +59,7 @@
   }
 }
 
-
-// public static native boolean isManaged();
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
+static jboolean IsManaged(JNIEnv* env, jclass cls, size_t level) {
   ScopedObjectAccess soa(env);
 
   mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
@@ -71,13 +71,19 @@
     return JNI_FALSE;
   }
 
-  NthCallerVisitor caller(soa.Self(), 1, false);
+  NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
 
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_FALSE : JNI_TRUE;
 }
 
+// public static native boolean isManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
+  return IsManaged(env, cls, 1);
+}
+
 // public static native void assertIsManaged();
 
 extern "C" JNIEXPORT void JNICALL Java_Main_assertIsManaged(JNIEnv* env, jclass cls) {
@@ -86,4 +92,32 @@
   }
 }
 
+// public static native boolean isCallerInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerInterpreted(JNIEnv* env, jclass klass) {
+  return IsInterpreted(env, klass, 2);
+}
+
+// public static native void assertCallerIsInterpreted();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsInterpreted(JNIEnv* env, jclass klass) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isCallerInterpreted(env, klass));
+  }
+}
+
+// public static native boolean isCallerManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerManaged(JNIEnv* env, jclass cls) {
+  return IsManaged(env, cls, 2);
+}
+
+// public static native void assertCallerIsManaged();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, jclass cls) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isCallerManaged(env, cls));
+  }
+}
+
 }  // namespace art
diff --git a/test/dexdump/all.dex b/test/dexdump/all.dex
new file mode 100644
index 0000000..caf678d
--- /dev/null
+++ b/test/dexdump/all.dex
Binary files differ
diff --git a/test/dexdump/all.lst b/test/dexdump/all.lst
new file mode 100644
index 0000000..17ab9ca
--- /dev/null
+++ b/test/dexdump/all.lst
@@ -0,0 +1,21 @@
+#all.dex
+0x0000043c 8 A <init> ()V (none) -1
+0x00000454 58 A arrays ()V (none) -1
+0x000004a0 130 A binary_ops ()V (none) -1
+0x00000534 66 A binary_ops_2addr ()V (none) -1
+0x00000588 34 A binary_ops_lit16 ()V (none) -1
+0x000005bc 46 A binary_ops_lit8 ()V (none) -1
+0x000005fc 22 A compares ()V (none) -1
+0x00000624 50 A conditionals ()V (none) -1
+0x00000668 56 A constants ()V (none) -1
+0x000006b0 108 A misc ()V (none) -1
+0x0000072c 46 A moves ()V (none) -1
+0x0000076c 32 A packed_switch ()V (none) -1
+0x0000079c 2 A return32 ()I (none) -1
+0x000007b0 2 A return64 ()I (none) -1
+0x000007c4 2 A return_object ()Ljava/lang/Object; (none) -1
+0x000007d8 44 A sparse_switch ()V (none) -1
+0x00000814 58 A static_fields ()V (none) -1
+0x00000860 44 A unary_ops ()V (none) -1
+0x0000089c 58 A instance_fields ()V (none) -1
+0x000008e8 30 A invokes ()V (none) -1
diff --git a/test/dexdump/all.txt b/test/dexdump/all.txt
new file mode 100644
index 0000000..af4fb4c
--- /dev/null
+++ b/test/dexdump/all.txt
@@ -0,0 +1,622 @@
+Processing 'all.dex'...
+Opened 'all.dex', DEX version '035'
+DEX file header:
+magic               : 'dex\n035\0'
+checksum            : d5134208
+signature           : 7af6...100f
+file_size           : 2572
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 46
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 10
+type_ids_off        : 296 (0x000128)
+proto_ids_size      : 3
+proto_ids_off       : 336 (0x000150)
+field_ids_size      : 14
+field_ids_off       : 372 (0x000174)
+method_ids_size     : 21
+method_ids_off      : 484 (0x0001e4)
+class_defs_size     : 1
+class_defs_off      : 652 (0x00028c)
+data_size           : 1888
+data_off            : 684 (0x0002ac)
+
+Class #0 header:
+class_idx           : 4
+access_flags        : 1 (0x0001)
+superclass_idx      : 5
+interfaces_off      : 0 (0x000000)
+source_file_idx     : -1
+annotations_off     : 0 (0x000000)
+class_data_off      : 2310 (0x000906)
+static_fields_size  : 7
+instance_fields_size: 7
+direct_methods_size : 18
+virtual_methods_size: 2
+
+Class #0            -
+  Class descriptor  : 'LA;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+    #0              : (in LA;)
+      name          : 'sB'
+      type          : 'B'
+      access        : 0x000a (PRIVATE STATIC)
+    #1              : (in LA;)
+      name          : 'sC'
+      type          : 'C'
+      access        : 0x000a (PRIVATE STATIC)
+    #2              : (in LA;)
+      name          : 'sI'
+      type          : 'I'
+      access        : 0x000a (PRIVATE STATIC)
+    #3              : (in LA;)
+      name          : 'sJ'
+      type          : 'J'
+      access        : 0x000a (PRIVATE STATIC)
+    #4              : (in LA;)
+      name          : 'sO'
+      type          : 'LA;'
+      access        : 0x000a (PRIVATE STATIC)
+    #5              : (in LA;)
+      name          : 'sS'
+      type          : 'S'
+      access        : 0x000a (PRIVATE STATIC)
+    #6              : (in LA;)
+      name          : 'sZ'
+      type          : 'Z'
+      access        : 0x000a (PRIVATE STATIC)
+  Instance fields   -
+    #0              : (in LA;)
+      name          : 'mB'
+      type          : 'B'
+      access        : 0x0002 (PRIVATE)
+    #1              : (in LA;)
+      name          : 'mC'
+      type          : 'C'
+      access        : 0x0002 (PRIVATE)
+    #2              : (in LA;)
+      name          : 'mI'
+      type          : 'I'
+      access        : 0x0002 (PRIVATE)
+    #3              : (in LA;)
+      name          : 'mJ'
+      type          : 'J'
+      access        : 0x0002 (PRIVATE)
+    #4              : (in LA;)
+      name          : 'mO'
+      type          : 'LA;'
+      access        : 0x0002 (PRIVATE)
+    #5              : (in LA;)
+      name          : 'mS'
+      type          : 'S'
+      access        : 0x0002 (PRIVATE)
+    #6              : (in LA;)
+      name          : 'mZ'
+      type          : 'Z'
+      access        : 0x0002 (PRIVATE)
+  Direct methods    -
+    #0              : (in LA;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+00042c:                                        |[00042c] A.<init>:()V
+00043c: 7010 1400 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0014
+000442: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #1              : (in LA;)
+      name          : 'arrays'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 29 16-bit code units
+000444:                                        |[000444] A.arrays:()V
+000454: 4400 0102                              |0000: aget v0, v1, v2
+000458: 4500 0102                              |0002: aget-wide v0, v1, v2
+00045c: 4600 0102                              |0004: aget-object v0, v1, v2
+000460: 4700 0102                              |0006: aget-boolean v0, v1, v2
+000464: 4800 0102                              |0008: aget-byte v0, v1, v2
+000468: 4900 0102                              |000a: aget-char v0, v1, v2
+00046c: 4a00 0102                              |000c: aget-short v0, v1, v2
+000470: 4b00 0102                              |000e: aput v0, v1, v2
+000474: 4c00 0102                              |0010: aput-wide v0, v1, v2
+000478: 4d00 0102                              |0012: aput-object v0, v1, v2
+00047c: 4e00 0102                              |0014: aput-boolean v0, v1, v2
+000480: 4f00 0102                              |0016: aput-byte v0, v1, v2
+000484: 5000 0102                              |0018: aput-char v0, v1, v2
+000488: 5100 0102                              |001a: aput-short v0, v1, v2
+00048c: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #2              : (in LA;)
+      name          : 'binary_ops'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 65 16-bit code units
+000490:                                        |[000490] A.binary_ops:()V
+0004a0: 9000 0102                              |0000: add-int v0, v1, v2
+0004a4: 9100 0102                              |0002: sub-int v0, v1, v2
+0004a8: 9200 0102                              |0004: mul-int v0, v1, v2
+0004ac: 9300 0102                              |0006: div-int v0, v1, v2
+0004b0: 9400 0102                              |0008: rem-int v0, v1, v2
+0004b4: 9500 0102                              |000a: and-int v0, v1, v2
+0004b8: 9600 0102                              |000c: or-int v0, v1, v2
+0004bc: 9700 0102                              |000e: xor-int v0, v1, v2
+0004c0: 9800 0102                              |0010: shl-int v0, v1, v2
+0004c4: 9900 0102                              |0012: shr-int v0, v1, v2
+0004c8: 9a00 0102                              |0014: ushr-int v0, v1, v2
+0004cc: 9b00 0102                              |0016: add-long v0, v1, v2
+0004d0: 9c00 0102                              |0018: sub-long v0, v1, v2
+0004d4: 9d00 0102                              |001a: mul-long v0, v1, v2
+0004d8: 9e00 0102                              |001c: div-long v0, v1, v2
+0004dc: 9f00 0102                              |001e: rem-long v0, v1, v2
+0004e0: a000 0102                              |0020: and-long v0, v1, v2
+0004e4: a100 0102                              |0022: or-long v0, v1, v2
+0004e8: a200 0102                              |0024: xor-long v0, v1, v2
+0004ec: a300 0102                              |0026: shl-long v0, v1, v2
+0004f0: a400 0102                              |0028: shr-long v0, v1, v2
+0004f4: a500 0102                              |002a: ushr-long v0, v1, v2
+0004f8: a600 0102                              |002c: add-float v0, v1, v2
+0004fc: a700 0102                              |002e: sub-float v0, v1, v2
+000500: a800 0102                              |0030: mul-float v0, v1, v2
+000504: a900 0102                              |0032: div-float v0, v1, v2
+000508: aa00 0102                              |0034: rem-float v0, v1, v2
+00050c: ab00 0102                              |0036: add-double v0, v1, v2
+000510: ac00 0102                              |0038: sub-double v0, v1, v2
+000514: ad00 0102                              |003a: mul-double v0, v1, v2
+000518: ae00 0102                              |003c: div-double v0, v1, v2
+00051c: af00 0102                              |003e: rem-double v0, v1, v2
+000520: 0e00                                   |0040: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #3              : (in LA;)
+      name          : 'binary_ops_2addr'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 33 16-bit code units
+000524:                                        |[000524] A.binary_ops_2addr:()V
+000534: b010                                   |0000: add-int/2addr v0, v1
+000536: b110                                   |0001: sub-int/2addr v0, v1
+000538: b210                                   |0002: mul-int/2addr v0, v1
+00053a: b310                                   |0003: div-int/2addr v0, v1
+00053c: b410                                   |0004: rem-int/2addr v0, v1
+00053e: b510                                   |0005: and-int/2addr v0, v1
+000540: b610                                   |0006: or-int/2addr v0, v1
+000542: b710                                   |0007: xor-int/2addr v0, v1
+000544: b810                                   |0008: shl-int/2addr v0, v1
+000546: b910                                   |0009: shr-int/2addr v0, v1
+000548: ba10                                   |000a: ushr-int/2addr v0, v1
+00054a: bb10                                   |000b: add-long/2addr v0, v1
+00054c: bc10                                   |000c: sub-long/2addr v0, v1
+00054e: bd10                                   |000d: mul-long/2addr v0, v1
+000550: be10                                   |000e: div-long/2addr v0, v1
+000552: bf10                                   |000f: rem-long/2addr v0, v1
+000554: c010                                   |0010: and-long/2addr v0, v1
+000556: c110                                   |0011: or-long/2addr v0, v1
+000558: c210                                   |0012: xor-long/2addr v0, v1
+00055a: c310                                   |0013: shl-long/2addr v0, v1
+00055c: c410                                   |0014: shr-long/2addr v0, v1
+00055e: c510                                   |0015: ushr-long/2addr v0, v1
+000560: c610                                   |0016: add-float/2addr v0, v1
+000562: c710                                   |0017: sub-float/2addr v0, v1
+000564: c810                                   |0018: mul-float/2addr v0, v1
+000566: c910                                   |0019: div-float/2addr v0, v1
+000568: ca10                                   |001a: rem-float/2addr v0, v1
+00056a: cb10                                   |001b: add-double/2addr v0, v1
+00056c: cc10                                   |001c: sub-double/2addr v0, v1
+00056e: cd10                                   |001d: mul-double/2addr v0, v1
+000570: ce10                                   |001e: div-double/2addr v0, v1
+000572: cf10                                   |001f: rem-double/2addr v0, v1
+000574: 0e00                                   |0020: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #4              : (in LA;)
+      name          : 'binary_ops_lit16'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 17 16-bit code units
+000578:                                        |[000578] A.binary_ops_lit16:()V
+000588: d010 3412                              |0000: add-int/lit16 v0, v1, #int 4660 // #1234
+00058c: d110 3412                              |0002: rsub-int v0, v1, #int 4660 // #1234
+000590: d210 3412                              |0004: mul-int/lit16 v0, v1, #int 4660 // #1234
+000594: d310 3412                              |0006: div-int/lit16 v0, v1, #int 4660 // #1234
+000598: d410 3412                              |0008: rem-int/lit16 v0, v1, #int 4660 // #1234
+00059c: d510 3412                              |000a: and-int/lit16 v0, v1, #int 4660 // #1234
+0005a0: d610 3412                              |000c: or-int/lit16 v0, v1, #int 4660 // #1234
+0005a4: d710 3412                              |000e: xor-int/lit16 v0, v1, #int 4660 // #1234
+0005a8: 0e00                                   |0010: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #5              : (in LA;)
+      name          : 'binary_ops_lit8'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 23 16-bit code units
+0005ac:                                        |[0005ac] A.binary_ops_lit8:()V
+0005bc: d800 0112                              |0000: add-int/lit8 v0, v1, #int 18 // #12
+0005c0: d900 0112                              |0002: rsub-int/lit8 v0, v1, #int 18 // #12
+0005c4: da00 0112                              |0004: mul-int/lit8 v0, v1, #int 18 // #12
+0005c8: db00 0112                              |0006: div-int/lit8 v0, v1, #int 18 // #12
+0005cc: dc00 0112                              |0008: rem-int/lit8 v0, v1, #int 18 // #12
+0005d0: dd00 0112                              |000a: and-int/lit8 v0, v1, #int 18 // #12
+0005d4: de00 0112                              |000c: or-int/lit8 v0, v1, #int 18 // #12
+0005d8: df00 0112                              |000e: xor-int/lit8 v0, v1, #int 18 // #12
+0005dc: e000 0112                              |0010: shl-int/lit8 v0, v1, #int 18 // #12
+0005e0: e100 0112                              |0012: shr-int/lit8 v0, v1, #int 18 // #12
+0005e4: e200 0112                              |0014: ushr-int/lit8 v0, v1, #int 18 // #12
+0005e8: 0e00                                   |0016: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #6              : (in LA;)
+      name          : 'compares'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 11 16-bit code units
+0005ec:                                        |[0005ec] A.compares:()V
+0005fc: 2d00 0102                              |0000: cmpl-float v0, v1, v2
+000600: 2e00 0102                              |0002: cmpg-float v0, v1, v2
+000604: 2f00 0102                              |0004: cmpl-double v0, v1, v2
+000608: 3000 0102                              |0006: cmpg-double v0, v1, v2
+00060c: 3100 0102                              |0008: cmp-long v0, v1, v2
+000610: 0e00                                   |000a: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #7              : (in LA;)
+      name          : 'conditionals'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 25 16-bit code units
+000614:                                        |[000614] A.conditionals:()V
+000624: 3210 1800                              |0000: if-eq v0, v1, 0018 // +0018
+000628: 3310 1600                              |0002: if-ne v0, v1, 0018 // +0016
+00062c: 3410 1400                              |0004: if-lt v0, v1, 0018 // +0014
+000630: 3510 1200                              |0006: if-ge v0, v1, 0018 // +0012
+000634: 3610 1000                              |0008: if-gt v0, v1, 0018 // +0010
+000638: 3710 0e00                              |000a: if-le v0, v1, 0018 // +000e
+00063c: 3800 0c00                              |000c: if-eqz v0, 0018 // +000c
+000640: 3900 0a00                              |000e: if-nez v0, 0018 // +000a
+000644: 3a00 0800                              |0010: if-ltz v0, 0018 // +0008
+000648: 3b00 0600                              |0012: if-gez v0, 0018 // +0006
+00064c: 3c00 0400                              |0014: if-gtz v0, 0018 // +0004
+000650: 3d00 0200                              |0016: if-lez v0, 0018 // +0002
+000654: 0e00                                   |0018: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #8              : (in LA;)
+      name          : 'constants'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 28 16-bit code units
+000658:                                        |[000658] A.constants:()V
+000668: 1210                                   |0000: const/4 v0, #int 1 // #1
+00066a: 1300 3412                              |0001: const/16 v0, #int 4660 // #1234
+00066e: 1400 7856 3412                         |0003: const v0, #float 5.69046e-28 // #12345678
+000674: 1500 3412                              |0006: const/high16 v0, #int 305397760 // #1234
+000678: 1600 3412                              |0008: const-wide/16 v0, #int 4660 // #1234
+00067c: 1700 7856 3412                         |000a: const-wide/32 v0, #float 5.69046e-28 // #12345678
+000682: 1800 efcd ab90 7856 3412               |000d: const-wide v0, #double 5.62635e-221 // #1234567890abcdef
+00068c: 1900 3412                              |0012: const-wide/high16 v0, #long 1311673391471656960 // #1234
+000690: 1a00 2c00                              |0014: const-string v0, "string" // string@002c
+000694: 1b00 2c00 0000                         |0016: const-string/jumbo v0, "string" // string@0000002c
+00069a: 1c00 0500                              |0019: const-class v0, Ljava/lang/Object; // type@0005
+00069e: 0e00                                   |001b: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #9              : (in LA;)
+      name          : 'misc'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 5
+      ins           : 0
+      outs          : 0
+      insns size    : 54 16-bit code units
+0006a0:                                        |[0006a0] A.misc:()V
+0006b0: 0000                                   |0000: nop // spacer
+0006b2: 1d00                                   |0001: monitor-enter v0
+0006b4: 1e00                                   |0002: monitor-exit v0
+0006b6: 1f00 0500                              |0003: check-cast v0, Ljava/lang/Object; // type@0005
+0006ba: 2010 0500                              |0005: instance-of v0, v1, Ljava/lang/Object; // type@0005
+0006be: 2110                                   |0007: array-length v0, v1
+0006c0: 2200 0500                              |0008: new-instance v0, Ljava/lang/Object; // type@0005
+0006c4: 2310 0500                              |000a: new-array v0, v1, Ljava/lang/Object; // type@0005
+0006c8: 2454 0900 1032                         |000c: filled-new-array {v0, v1, v2, v3, v4}, [Ljava/lang/Object; // type@0009
+0006ce: 2505 0900 0000                         |000f: filled-new-array/range {v0, v1, v2, v3, v4}, [Ljava/lang/Object; // type@0009
+0006d4: 2600 0c00 0000                         |0012: fill-array-data v0, 0000001e // +0000000c
+0006da: 2700                                   |0015: throw v0
+0006dc: 2806                                   |0016: goto 001c // +0006
+0006de: 2900 0500                              |0017: goto/16 001c // +0005
+0006e2: 2a00 0300 0000                         |0019: goto/32 #00000003
+0006e8: 0e00                                   |001c: return-void
+0006ea: 0000                                   |001d: nop // spacer
+0006ec: 0003 0400 0a00 0000 0100 0000 0200 ... |001e: array-data (24 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #10              : (in LA;)
+      name          : 'moves'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 23 16-bit code units
+00071c:                                        |[00071c] A.moves:()V
+00072c: 0110                                   |0000: move v0, v1
+00072e: 0200 0100                              |0001: move/from16 v0, v1
+000732: 0300 0000 0100                         |0003: move/16 v0, v1
+000738: 0410                                   |0006: move-wide v0, v1
+00073a: 0500 0100                              |0007: move-wide/from16 v0, v1
+00073e: 0600 0000 0100                         |0009: move-wide/16 v0, v1
+000744: 0710                                   |000c: move-object v0, v1
+000746: 0800 0100                              |000d: move-object/from16 v0, v1
+00074a: 0900 0000 0100                         |000f: move-object/16 v0, v1
+000750: 0a00                                   |0012: move-result v0
+000752: 0b00                                   |0013: move-result-wide v0
+000754: 0c00                                   |0014: move-result-object v0
+000756: 0d00                                   |0015: move-exception v0
+000758: 0e00                                   |0016: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #11              : (in LA;)
+      name          : 'packed_switch'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 16 16-bit code units
+00075c:                                        |[00075c] A.packed_switch:()V
+00076c: 2b00 0800 0000                         |0000: packed-switch v0, 00000008 // +00000008
+000772: 0e00                                   |0003: return-void
+000774: 28ff                                   |0004: goto 0003 // -0001
+000776: 28fe                                   |0005: goto 0003 // -0002
+000778: 28fd                                   |0006: goto 0003 // -0003
+00077a: 0000                                   |0007: nop // spacer
+00077c: 0001 0200 feff ff7f 0500 0000 0600 ... |0008: packed-switch-data (8 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #12              : (in LA;)
+      name          : 'return32'
+      type          : '()I'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+00078c:                                        |[00078c] A.return32:()I
+00079c: 0f00                                   |0000: return v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #13              : (in LA;)
+      name          : 'return64'
+      type          : '()I'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+0007a0:                                        |[0007a0] A.return64:()I
+0007b0: 1000                                   |0000: return-wide v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #14              : (in LA;)
+      name          : 'return_object'
+      type          : '()Ljava/lang/Object;'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+0007b4:                                        |[0007b4] A.return_object:()Ljava/lang/Object;
+0007c4: 1100                                   |0000: return-object v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #15              : (in LA;)
+      name          : 'sparse_switch'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 22 16-bit code units
+0007c8:                                        |[0007c8] A.sparse_switch:()V
+0007d8: 2c00 0400 0000                         |0000: sparse-switch v0, 00000004 // +00000004
+0007de: 0e00                                   |0003: return-void
+0007e0: 0002 0400 1111 0000 2222 0000 3333 ... |0004: sparse-switch-data (18 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #16              : (in LA;)
+      name          : 'static_fields'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 29 16-bit code units
+000804:                                        |[000804] A.static_fields:()V
+000814: 6000 0900                              |0000: sget v0, LA;.sI:I // field@0009
+000818: 6100 0a00                              |0002: sget-wide v0, LA;.sJ:J // field@000a
+00081c: 6200 0b00                              |0004: sget-object v0, LA;.sO:LA; // field@000b
+000820: 6300 0d00                              |0006: sget-boolean v0, LA;.sZ:Z // field@000d
+000824: 6400 0700                              |0008: sget-byte v0, LA;.sB:B // field@0007
+000828: 6500 0800                              |000a: sget-char v0, LA;.sC:C // field@0008
+00082c: 6600 0c00                              |000c: sget-short v0, LA;.sS:S // field@000c
+000830: 6700 0900                              |000e: sput v0, LA;.sI:I // field@0009
+000834: 6800 0a00                              |0010: sput-wide v0, LA;.sJ:J // field@000a
+000838: 6900 0b00                              |0012: sput-object v0, LA;.sO:LA; // field@000b
+00083c: 6a00 0d00                              |0014: sput-boolean v0, LA;.sZ:Z // field@000d
+000840: 6b00 0700                              |0016: sput-byte v0, LA;.sB:B // field@0007
+000844: 6c00 0800                              |0018: sput-char v0, LA;.sC:C // field@0008
+000848: 6d00 0500                              |001a: sput-short v0, LA;.mS:S // field@0005
+00084c: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #17              : (in LA;)
+      name          : 'unary_ops'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 22 16-bit code units
+000850:                                        |[000850] A.unary_ops:()V
+000860: 7b10                                   |0000: neg-int v0, v1
+000862: 7c10                                   |0001: not-int v0, v1
+000864: 7d10                                   |0002: neg-long v0, v1
+000866: 7e10                                   |0003: not-long v0, v1
+000868: 7f10                                   |0004: neg-float v0, v1
+00086a: 8010                                   |0005: neg-double v0, v1
+00086c: 8110                                   |0006: int-to-long v0, v1
+00086e: 8210                                   |0007: int-to-float v0, v1
+000870: 8310                                   |0008: int-to-double v0, v1
+000872: 8410                                   |0009: long-to-int v0, v1
+000874: 8510                                   |000a: long-to-float v0, v1
+000876: 8610                                   |000b: long-to-double v0, v1
+000878: 8710                                   |000c: float-to-int v0, v1
+00087a: 8810                                   |000d: float-to-long v0, v1
+00087c: 8910                                   |000e: float-to-double v0, v1
+00087e: 8a10                                   |000f: double-to-int v0, v1
+000880: 8b10                                   |0010: double-to-long v0, v1
+000882: 8c10                                   |0011: double-to-float v0, v1
+000884: 8d10                                   |0012: int-to-byte v0, v1
+000886: 8e10                                   |0013: int-to-char v0, v1
+000888: 8f10                                   |0014: int-to-short v0, v1
+00088a: 0e00                                   |0015: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+  Virtual methods   -
+    #0              : (in LA;)
+      name          : 'instance_fields'
+      type          : '()V'
+      access        : 0x0001 (PUBLIC)
+      code          -
+      registers     : 2
+      ins           : 1
+      outs          : 0
+      insns size    : 29 16-bit code units
+00088c:                                        |[00088c] A.instance_fields:()V
+00089c: 5210 0900                              |0000: iget v0, v1, LA;.sI:I // field@0009
+0008a0: 5310 0a00                              |0002: iget-wide v0, v1, LA;.sJ:J // field@000a
+0008a4: 5410 0b00                              |0004: iget-object v0, v1, LA;.sO:LA; // field@000b
+0008a8: 5510 0d00                              |0006: iget-boolean v0, v1, LA;.sZ:Z // field@000d
+0008ac: 5610 0700                              |0008: iget-byte v0, v1, LA;.sB:B // field@0007
+0008b0: 5710 0800                              |000a: iget-char v0, v1, LA;.sC:C // field@0008
+0008b4: 5810 0c00                              |000c: iget-short v0, v1, LA;.sS:S // field@000c
+0008b8: 5910 0900                              |000e: iput v0, v1, LA;.sI:I // field@0009
+0008bc: 5a10 0a00                              |0010: iput-wide v0, v1, LA;.sJ:J // field@000a
+0008c0: 5b10 0b00                              |0012: iput-object v0, v1, LA;.sO:LA; // field@000b
+0008c4: 5c10 0d00                              |0014: iput-boolean v0, v1, LA;.sZ:Z // field@000d
+0008c8: 5d10 0700                              |0016: iput-byte v0, v1, LA;.sB:B // field@0007
+0008cc: 5e10 0800                              |0018: iput-char v0, v1, LA;.sC:C // field@0008
+0008d0: 5f10 0c00                              |001a: iput-short v0, v1, LA;.sS:S // field@000c
+0008d4: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #1              : (in LA;)
+      name          : 'invokes'
+      type          : '()V'
+      access        : 0x0001 (PUBLIC)
+      code          -
+      registers     : 5
+      ins           : 1
+      outs          : 1
+      insns size    : 15 16-bit code units
+0008d8:                                        |[0008d8] A.invokes:()V
+0008e8: 6e54 0a00 1032                         |0000: invoke-virtual {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008ee: 6f54 0a00 1032                         |0003: invoke-super {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008f4: 7054 0a00 1032                         |0006: invoke-direct {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008fa: 7154 0a00 1032                         |0009: invoke-static {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+000900: 7254 0a00 1032                         |000c: invoke-interface {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+  source_file_idx   : -1 (unknown)
+
diff --git a/test/dexdump/all.xml b/test/dexdump/all.xml
new file mode 100644
index 0000000..b623ecb
--- /dev/null
+++ b/test/dexdump/all.xml
@@ -0,0 +1,211 @@
+<api>
+<package name=""
+>
+<class name="A"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<constructor name="A"
+ type="A"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+<method name="arrays"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_2addr"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_lit16"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_lit8"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="compares"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="conditionals"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="constants"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="misc"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="moves"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="packed_switch"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return32"
+ return="int"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return64"
+ return="int"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return_object"
+ return="java.lang.Object"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="sparse_switch"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="static_fields"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="unary_ops"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="instance_fields"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="invokes"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</method>
+</class>
+</package>
+</api>
diff --git a/test/dexdump/bytecodes.txt b/test/dexdump/bytecodes.txt
old mode 100755
new mode 100644
index 4c8b79b..e1a381e
--- a/test/dexdump/bytecodes.txt
+++ b/test/dexdump/bytecodes.txt
@@ -12,8 +12,8 @@
 string_ids_off      : 112 (0x000070)
 type_ids_size       : 42
 type_ids_off        : 724 (0x0002d4)
-proto_ids_size       : 12
-proto_ids_off        : 892 (0x00037c)
+proto_ids_size      : 12
+proto_ids_off       : 892 (0x00037c)
 field_ids_size      : 40
 field_ids_off       : 1036 (0x00040c)
 method_ids_size     : 28
@@ -36,6 +36,11 @@
 direct_methods_size : 0
 virtual_methods_size: 1
 
+Class #0 annotations:
+Annotations on class
+  VISIBILITY_RUNTIME Ljava/lang/annotation/Retention; value=CLASS
+  VISIBILITY_RUNTIME Ljava/lang/annotation/Target; value={ TYPE FIELD METHOD PARAMETER CONSTRUCTOR LOCAL_VARIABLE }
+
 Class #0            -
   Class descriptor  : 'Landroid/annotation/SuppressLint;'
   Access flags      : 0x2601 (PUBLIC INTERFACE ABSTRACT ANNOTATION)
@@ -67,6 +72,11 @@
 direct_methods_size : 0
 virtual_methods_size: 1
 
+Class #1 annotations:
+Annotations on class
+  VISIBILITY_RUNTIME Ljava/lang/annotation/Retention; value=CLASS
+  VISIBILITY_RUNTIME Ljava/lang/annotation/Target; value={ TYPE METHOD CONSTRUCTOR }
+
 Class #1            -
   Class descriptor  : 'Landroid/annotation/TargetApi;'
   Access flags      : 0x2601 (PUBLIC INTERFACE ABSTRACT ANNOTATION)
@@ -144,6 +154,11 @@
 direct_methods_size : 1
 virtual_methods_size: 0
 
+Class #3 annotations:
+Annotations on class
+  VISIBILITY_SYSTEM Ldalvik/annotation/EnclosingClass; value=Lcom/google/android/test/R;
+  VISIBILITY_SYSTEM Ldalvik/annotation/InnerClass; accessFlags=25 name="attr"
+
 Class #3            -
   Class descriptor  : 'Lcom/google/android/test/R$attr;'
   Access flags      : 0x0011 (PUBLIC FINAL)
@@ -186,6 +201,11 @@
 direct_methods_size : 1
 virtual_methods_size: 0
 
+Class #4 annotations:
+Annotations on class
+  VISIBILITY_SYSTEM Ldalvik/annotation/EnclosingClass; value=Lcom/google/android/test/R;
+  VISIBILITY_SYSTEM Ldalvik/annotation/InnerClass; accessFlags=25 name="drawable"
+
 Class #4            -
   Class descriptor  : 'Lcom/google/android/test/R$drawable;'
   Access flags      : 0x0011 (PUBLIC FINAL)
@@ -233,6 +253,10 @@
 direct_methods_size : 1
 virtual_methods_size: 0
 
+Class #5 annotations:
+Annotations on class
+  VISIBILITY_SYSTEM Ldalvik/annotation/MemberClasses; value={ Lcom/google/android/test/R$attr; Lcom/google/android/test/R$drawable; }
+
 Class #5            -
   Class descriptor  : 'Lcom/google/android/test/R;'
   Access flags      : 0x0011 (PUBLIC FINAL)
@@ -275,6 +299,10 @@
 direct_methods_size : 13
 virtual_methods_size: 2
 
+Class #6 annotations:
+Annotations on method #13 'doit'
+  VISIBILITY_SYSTEM Ldalvik/annotation/Throws; value={ Ljava/lang/Exception; }
+
 Class #6            -
   Class descriptor  : 'Lcom/google/android/test/Test;'
   Access flags      : 0x0001 (PUBLIC)
@@ -418,17 +446,17 @@
 000a02: 6a00 1800                              |0001: sput-boolean v0, Lcom/google/android/test/Test;.sBool:Z // field@0018
 000a06: 1300 1f00                              |0003: const/16 v0, #int 31 // #1f
 000a0a: 6b00 1700                              |0005: sput-byte v0, Lcom/google/android/test/Test;.sB:B // field@0017
-000a0e: 1400 ffff 0000                         |0007: const v0, #float 0.000000 // #0000ffff
+000a0e: 1400 ffff 0000                         |0007: const v0, #float 9.18341e-41 // #0000ffff
 000a14: 6c00 1900                              |000a: sput-char v0, Lcom/google/android/test/Test;.sC:C // field@0019
 000a18: 1300 3412                              |000c: const/16 v0, #int 4660 // #1234
 000a1c: 6d00 1f00                              |000e: sput-short v0, Lcom/google/android/test/Test;.sS:S // field@001f
-000a20: 1400 7856 3412                         |0010: const v0, #float 0.000000 // #12345678
+000a20: 1400 7856 3412                         |0010: const v0, #float 5.69046e-28 // #12345678
 000a26: 6700 1c00                              |0013: sput v0, Lcom/google/android/test/Test;.sI:I // field@001c
-000a2a: 1800 ffff cdab 7956 3412               |0015: const-wide v0, #double 0.000000 // #12345679abcdffff
+000a2a: 1800 ffff cdab 7956 3412               |0015: const-wide v0, #double 5.62635e-221 // #12345679abcdffff
 000a34: 6800 1d00                              |001a: sput-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
-000a38: 1400 00e4 4046                         |001c: const v0, #float 12345.000000 // #4640e400
+000a38: 1400 00e4 4046                         |001c: const v0, #float 12345 // #4640e400
 000a3e: 6700 1b00                              |001f: sput v0, Lcom/google/android/test/Test;.sF:F // field@001b
-000a42: 1800 0000 0000 801c c840               |0021: const-wide v0, #double 12345.000000 // #40c81c8000000000
+000a42: 1800 0000 0000 801c c840               |0021: const-wide v0, #double 12345 // #40c81c8000000000
 000a4c: 6800 1a00                              |0026: sput-wide v0, Lcom/google/android/test/Test;.sD:D // field@001a
 000a50: 1200                                   |0028: const/4 v0, #int 0 // #0
 000a52: 6900 1e00                              |0029: sput-object v0, Lcom/google/android/test/Test;.sO:Ljava/lang/Object; // field@001e
@@ -471,17 +499,17 @@
 000ab4: 5c81 0d00                              |0008: iput-boolean v1, v8, Lcom/google/android/test/Test;.mBool:Z // field@000d
 000ab8: 1301 1f00                              |000a: const/16 v1, #int 31 // #1f
 000abc: 5d81 0c00                              |000c: iput-byte v1, v8, Lcom/google/android/test/Test;.mB:B // field@000c
-000ac0: 1401 ffff 0000                         |000e: const v1, #float 0.000000 // #0000ffff
+000ac0: 1401 ffff 0000                         |000e: const v1, #float 9.18341e-41 // #0000ffff
 000ac6: 5e81 0e00                              |0011: iput-char v1, v8, Lcom/google/android/test/Test;.mC:C // field@000e
 000aca: 1301 3412                              |0013: const/16 v1, #int 4660 // #1234
 000ace: 5f81 1500                              |0015: iput-short v1, v8, Lcom/google/android/test/Test;.mS:S // field@0015
-000ad2: 1401 7856 3412                         |0017: const v1, #float 0.000000 // #12345678
+000ad2: 1401 7856 3412                         |0017: const v1, #float 5.69046e-28 // #12345678
 000ad8: 5981 1100                              |001a: iput v1, v8, Lcom/google/android/test/Test;.mI:I // field@0011
-000adc: 1802 ffff cdab 7956 3412               |001c: const-wide v2, #double 0.000000 // #12345679abcdffff
+000adc: 1802 ffff cdab 7956 3412               |001c: const-wide v2, #double 5.62635e-221 // #12345679abcdffff
 000ae6: 5a82 1200                              |0021: iput-wide v2, v8, Lcom/google/android/test/Test;.mL:J // field@0012
-000aea: 1401 00e4 4046                         |0023: const v1, #float 12345.000000 // #4640e400
+000aea: 1401 00e4 4046                         |0023: const v1, #float 12345 // #4640e400
 000af0: 5981 1000                              |0026: iput v1, v8, Lcom/google/android/test/Test;.mF:F // field@0010
-000af4: 1802 0000 0000 801c c840               |0028: const-wide v2, #double 12345.000000 // #40c81c8000000000
+000af4: 1802 0000 0000 801c c840               |0028: const-wide v2, #double 12345 // #40c81c8000000000
 000afe: 5a82 0f00                              |002d: iput-wide v2, v8, Lcom/google/android/test/Test;.mD:D // field@000f
 000b02: 1201                                   |002f: const/4 v1, #int 0 // #0
 000b04: 5b81 1300                              |0030: iput-object v1, v8, Lcom/google/android/test/Test;.mO:Ljava/lang/Object; // field@0013
@@ -626,7 +654,7 @@
 000cc6: 8d00                                   |0011: int-to-byte v0, v0
 000cc8: 5db0 0c00                              |0012: iput-byte v0, v11, Lcom/google/android/test/Test;.mB:B // field@000c
 000ccc: 57b0 0e00                              |0014: iget-char v0, v11, Lcom/google/android/test/Test;.mC:C // field@000e
-000cd0: 1401 ffff 0000                         |0016: const v1, #float 0.000000 // #0000ffff
+000cd0: 1401 ffff 0000                         |0016: const v1, #float 9.18341e-41 // #0000ffff
 000cd6: b010                                   |0019: add-int/2addr v0, v1
 000cd8: 8e00                                   |001a: int-to-char v0, v0
 000cda: 5eb0 0e00                              |001b: iput-char v0, v11, Lcom/google/android/test/Test;.mC:C // field@000e
@@ -635,7 +663,7 @@
 000ce6: 8f00                                   |0021: int-to-short v0, v0
 000ce8: 5fb0 1500                              |0022: iput-short v0, v11, Lcom/google/android/test/Test;.mS:S // field@0015
 000cec: 52b0 1100                              |0024: iget v0, v11, Lcom/google/android/test/Test;.mI:I // field@0011
-000cf0: 1401 7856 3412                         |0026: const v1, #float 0.000000 // #12345678
+000cf0: 1401 7856 3412                         |0026: const v1, #float 5.69046e-28 // #12345678
 000cf6: b010                                   |0029: add-int/2addr v0, v1
 000cf8: 59b0 1100                              |002a: iput v0, v11, Lcom/google/android/test/Test;.mI:I // field@0011
 000cfc: 52b0 1100                              |002c: iget v0, v11, Lcom/google/android/test/Test;.mI:I // field@0011
@@ -643,7 +671,7 @@
 000d04: b010                                   |0030: add-int/2addr v0, v1
 000d06: 59b0 1100                              |0031: iput v0, v11, Lcom/google/android/test/Test;.mI:I // field@0011
 000d0a: 53b0 1200                              |0033: iget-wide v0, v11, Lcom/google/android/test/Test;.mL:J // field@0012
-000d0e: 1802 ffff cdab 7956 3412               |0035: const-wide v2, #double 0.000000 // #12345679abcdffff
+000d0e: 1802 ffff cdab 7956 3412               |0035: const-wide v2, #double 5.62635e-221 // #12345679abcdffff
 000d18: bb20                                   |003a: add-long/2addr v0, v2
 000d1a: 5ab0 1200                              |003b: iput-wide v0, v11, Lcom/google/android/test/Test;.mL:J // field@0012
 000d1e: 53b0 1200                              |003d: iget-wide v0, v11, Lcom/google/android/test/Test;.mL:J // field@0012
@@ -651,7 +679,7 @@
 000d26: bb20                                   |0041: add-long/2addr v0, v2
 000d28: 5ab0 1200                              |0042: iput-wide v0, v11, Lcom/google/android/test/Test;.mL:J // field@0012
 000d2c: 52b0 1000                              |0044: iget v0, v11, Lcom/google/android/test/Test;.mF:F // field@0010
-000d30: 1401 00e4 4046                         |0046: const v1, #float 12345.000000 // #4640e400
+000d30: 1401 00e4 4046                         |0046: const v1, #float 12345 // #4640e400
 000d36: 52b2 1000                              |0049: iget v2, v11, Lcom/google/android/test/Test;.mF:F // field@0010
 000d3a: 1503 803f                              |004b: const/high16 v3, #int 1065353216 // #3f80
 000d3e: c732                                   |004d: sub-float/2addr v2, v3
@@ -664,7 +692,7 @@
 000d50: c610                                   |0056: add-float/2addr v0, v1
 000d52: 59b0 1000                              |0057: iput v0, v11, Lcom/google/android/test/Test;.mF:F // field@0010
 000d56: 53b0 0f00                              |0059: iget-wide v0, v11, Lcom/google/android/test/Test;.mD:D // field@000f
-000d5a: 1802 0000 0000 801c c840               |005b: const-wide v2, #double 12345.000000 // #40c81c8000000000
+000d5a: 1802 0000 0000 801c c840               |005b: const-wide v2, #double 12345 // #40c81c8000000000
 000d64: 53b4 0f00                              |0060: iget-wide v4, v11, Lcom/google/android/test/Test;.mD:D // field@000f
 000d68: 1906 f03f                              |0062: const-wide/high16 v6, #long 4607182418800017408 // #3ff0
 000d6c: cc64                                   |0064: sub-double/2addr v4, v6
@@ -681,7 +709,7 @@
 000d8a: 2d00 0001                              |0073: cmpl-float v0, v0, v1
 000d8e: 3800 2900                              |0075: if-eqz v0, 009e // +0029
 000d92: 52b0 1000                              |0077: iget v0, v11, Lcom/google/android/test/Test;.mF:F // field@0010
-000d96: 1401 9a99 993e                         |0079: const v1, #float 0.300000 // #3e99999a
+000d96: 1401 9a99 993e                         |0079: const v1, #float 0.3 // #3e99999a
 000d9c: 2d00 0001                              |007c: cmpl-float v0, v0, v1
 000da0: 3900 2000                              |007e: if-nez v0, 009e // +0020
 000da4: 52b0 1000                              |0080: iget v0, v11, Lcom/google/android/test/Test;.mF:F // field@0010
@@ -707,7 +735,7 @@
 000df2: 2f00 0002                              |00a7: cmpl-double v0, v0, v2
 000df6: 3800 2b00                              |00a9: if-eqz v0, 00d4 // +002b
 000dfa: 53b0 0f00                              |00ab: iget-wide v0, v11, Lcom/google/android/test/Test;.mD:D // field@000f
-000dfe: 1802 3333 3333 3333 d33f               |00ad: const-wide v2, #double 0.300000 // #3fd3333333333333
+000dfe: 1802 3333 3333 3333 d33f               |00ad: const-wide v2, #double 0.3 // #3fd3333333333333
 000e08: 2f00 0002                              |00b2: cmpl-double v0, v0, v2
 000e0c: 3900 2000                              |00b4: if-nez v0, 00d4 // +0020
 000e10: 53b0 0f00                              |00b6: iget-wide v0, v11, Lcom/google/android/test/Test;.mD:D // field@000f
@@ -790,7 +818,7 @@
 000eb8: 8d00                                   |000c: int-to-byte v0, v0
 000eba: 6b00 1700                              |000d: sput-byte v0, Lcom/google/android/test/Test;.sB:B // field@0017
 000ebe: 6500 1900                              |000f: sget-char v0, Lcom/google/android/test/Test;.sC:C // field@0019
-000ec2: 1401 ffff 0000                         |0011: const v1, #float 0.000000 // #0000ffff
+000ec2: 1401 ffff 0000                         |0011: const v1, #float 9.18341e-41 // #0000ffff
 000ec8: b010                                   |0014: add-int/2addr v0, v1
 000eca: 8e00                                   |0015: int-to-char v0, v0
 000ecc: 6c00 1900                              |0016: sput-char v0, Lcom/google/android/test/Test;.sC:C // field@0019
@@ -799,7 +827,7 @@
 000ed8: 8f00                                   |001c: int-to-short v0, v0
 000eda: 6d00 1f00                              |001d: sput-short v0, Lcom/google/android/test/Test;.sS:S // field@001f
 000ede: 6000 1c00                              |001f: sget v0, Lcom/google/android/test/Test;.sI:I // field@001c
-000ee2: 1401 7856 3412                         |0021: const v1, #float 0.000000 // #12345678
+000ee2: 1401 7856 3412                         |0021: const v1, #float 5.69046e-28 // #12345678
 000ee8: b010                                   |0024: add-int/2addr v0, v1
 000eea: 6700 1c00                              |0025: sput v0, Lcom/google/android/test/Test;.sI:I // field@001c
 000eee: 6000 1c00                              |0027: sget v0, Lcom/google/android/test/Test;.sI:I // field@001c
@@ -807,7 +835,7 @@
 000ef6: b010                                   |002b: add-int/2addr v0, v1
 000ef8: 6700 1c00                              |002c: sput v0, Lcom/google/android/test/Test;.sI:I // field@001c
 000efc: 6100 1d00                              |002e: sget-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
-000f00: 1802 ffff cdab 7956 3412               |0030: const-wide v2, #double 0.000000 // #12345679abcdffff
+000f00: 1802 ffff cdab 7956 3412               |0030: const-wide v2, #double 5.62635e-221 // #12345679abcdffff
 000f0a: bb20                                   |0035: add-long/2addr v0, v2
 000f0c: 6800 1d00                              |0036: sput-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
 000f10: 6100 1d00                              |0038: sget-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
@@ -815,7 +843,7 @@
 000f18: bb20                                   |003c: add-long/2addr v0, v2
 000f1a: 6800 1d00                              |003d: sput-wide v0, Lcom/google/android/test/Test;.sL:J // field@001d
 000f1e: 6000 1b00                              |003f: sget v0, Lcom/google/android/test/Test;.sF:F // field@001b
-000f22: 1401 00e4 4046                         |0041: const v1, #float 12345.000000 // #4640e400
+000f22: 1401 00e4 4046                         |0041: const v1, #float 12345 // #4640e400
 000f28: 6002 1b00                              |0044: sget v2, Lcom/google/android/test/Test;.sF:F // field@001b
 000f2c: 7f22                                   |0046: neg-float v2, v2
 000f2e: 1503 803f                              |0047: const/high16 v3, #int 1065353216 // #3f80
@@ -830,7 +858,7 @@
 000f48: c610                                   |0054: add-float/2addr v0, v1
 000f4a: 6700 1b00                              |0055: sput v0, Lcom/google/android/test/Test;.sF:F // field@001b
 000f4e: 6100 1a00                              |0057: sget-wide v0, Lcom/google/android/test/Test;.sD:D // field@001a
-000f52: 1802 0000 0000 801c c840               |0059: const-wide v2, #double 12345.000000 // #40c81c8000000000
+000f52: 1802 0000 0000 801c c840               |0059: const-wide v2, #double 12345 // #40c81c8000000000
 000f5c: 6104 1a00                              |005e: sget-wide v4, Lcom/google/android/test/Test;.sD:D // field@001a
 000f60: 8044                                   |0060: neg-double v4, v4
 000f62: 1906 f03f                              |0061: const-wide/high16 v6, #long 4607182418800017408 // #3ff0
diff --git a/test/dexdump/checkers.txt b/test/dexdump/checkers.txt
old mode 100755
new mode 100644
index 5c8336f..aee6e64
--- a/test/dexdump/checkers.txt
+++ b/test/dexdump/checkers.txt
@@ -12,8 +12,8 @@
 string_ids_off      : 112 (0x000070)
 type_ids_size       : 58
 type_ids_off        : 1404 (0x00057c)
-proto_ids_size       : 88
-proto_ids_off        : 1636 (0x000664)
+proto_ids_size      : 88
+proto_ids_off       : 1636 (0x000664)
 field_ids_size      : 108
 field_ids_off       : 2692 (0x000a84)
 method_ids_size     : 177
@@ -836,7 +836,7 @@
 001c8a: 1300 5829                              |0147: const/16 v0, #int 10584 // #2958
 001c8e: 2300 3600                              |0149: new-array v0, v0, [B // type@0036
 001c92: 6900 6800                              |014b: sput-object v0, Lcom/google/android/checkers/g;.p:[B // field@0068
-001c96: 1400 00c1 0300                         |014d: const v0, #float 0.000000 // #0003c100
+001c96: 1400 00c1 0300                         |014d: const v0, #float 3.44742e-40 // #0003c100
 001c9c: 2300 3600                              |0150: new-array v0, v0, [B // type@0036
 001ca0: 6900 6900                              |0152: sput-object v0, Lcom/google/android/checkers/g;.q:[B // field@0069
 001ca4: 6e10 1100 0a00                         |0154: invoke-virtual {v10}, Landroid/content/Context;.getResources:()Landroid/content/res/Resources; // method@0011
@@ -2044,7 +2044,7 @@
 002bd0: 5433 3b00                              |004c: iget-object v3, v3, Lcom/google/android/checkers/a;.b:[I // field@003b
 002bd4: 4403 0309                              |004e: aget v3, v3, v9
 002bd8: 5983 2a00                              |0050: iput v3, v8, Lcom/google/android/checkers/CheckersView;.x:I // field@002a
-002bdc: 1403 6666 663f                         |0052: const v3, #float 0.900000 // #3f666666
+002bdc: 1403 6666 663f                         |0052: const v3, #float 0.9 // #3f666666
 002be2: 5983 1e00                              |0055: iput v3, v8, Lcom/google/android/checkers/CheckersView;.l:F // field@001e
 002be6: 3800 4500                              |0057: if-eqz v0, 009c // +0045
 002bea: 5483 2200                              |0059: iget-object v3, v8, Lcom/google/android/checkers/CheckersView;.p:Lcom/google/android/checkers/a; // field@0022
@@ -2943,7 +2943,7 @@
 0036f2: 0800 1c00                              |0197: move-object/from16 v0, v28
 0036f6: 5202 1e00                              |0199: iget v2, v0, Lcom/google/android/checkers/CheckersView;.l:F // field@001e
 0036fa: 8922                                   |019b: float-to-double v2, v2
-0036fc: 1804 9a99 9999 9999 a93f               |019c: const-wide v4, #double 0.050000 // #3fa999999999999a
+0036fc: 1804 9a99 9999 9999 a93f               |019c: const-wide v4, #double 0.05 // #3fa999999999999a
 003706: cc42                                   |01a1: sub-double/2addr v2, v4
 003708: 8c22                                   |01a2: double-to-float v2, v2
 00370a: 0800 1c00                              |01a3: move-object/from16 v0, v28
@@ -3568,7 +3568,7 @@
 003f38: 28e9                                   |001e: goto 0007 // -0017
 003f3a: 1300 3075                              |001f: const/16 v0, #int 30000 // #7530
 003f3e: 28e6                                   |0021: goto 0007 // -001a
-003f40: 1400 60ea 0000                         |0022: const v0, #float 0.000000 // #0000ea60
+003f40: 1400 60ea 0000                         |0022: const v0, #float 8.40779e-41 // #0000ea60
 003f46: 28e2                                   |0025: goto 0007 // -001e
 003f48: 0d00                                   |0026: move-exception v0
 003f4a: 1e02                                   |0027: monitor-exit v2
@@ -3811,7 +3811,7 @@
 004024: 1302 0040                              |0046: const/16 v2, #int 16384 // #4000
 004028: 4b02 0001                              |0048: aput v2, v0, v1
 00402c: 1301 1300                              |004a: const/16 v1, #int 19 // #13
-004030: 1402 0080 0000                         |004c: const v2, #float 0.000000 // #00008000
+004030: 1402 0080 0000                         |004c: const v2, #float 4.59177e-41 // #00008000
 004036: 4b02 0001                              |004f: aput v2, v0, v1
 00403a: 1501 0100                              |0051: const/high16 v1, #int 65536 // #1
 00403e: 4b01 0006                              |0053: aput v1, v0, v6
@@ -3931,7 +3931,7 @@
 0041f6: 1302 0040                              |012f: const/16 v2, #int 16384 // #4000
 0041fa: 4b02 0001                              |0131: aput v2, v0, v1
 0041fe: 1301 1200                              |0133: const/16 v1, #int 18 // #12
-004202: 1402 0080 0000                         |0135: const v2, #float 0.000000 // #00008000
+004202: 1402 0080 0000                         |0135: const v2, #float 4.59177e-41 // #00008000
 004208: 4b02 0001                              |0138: aput v2, v0, v1
 00420c: 1301 1400                              |013a: const/16 v1, #int 20 // #14
 004210: 1502 0100                              |013c: const/high16 v2, #int 65536 // #1
@@ -3996,7 +3996,7 @@
 0042fa: 1301 0040                              |01b1: const/16 v1, #int 16384 // #4000
 0042fe: 4b01 0006                              |01b3: aput v1, v0, v6
 004302: 1301 1600                              |01b5: const/16 v1, #int 22 // #16
-004306: 1402 0080 0000                         |01b7: const v2, #float 0.000000 // #00008000
+004306: 1402 0080 0000                         |01b7: const v2, #float 4.59177e-41 // #00008000
 00430c: 4b02 0001                              |01ba: aput v2, v0, v1
 004310: 1301 1800                              |01bc: const/16 v1, #int 24 // #18
 004314: 1502 0200                              |01be: const/high16 v2, #int 131072 // #2
@@ -4045,7 +4045,7 @@
 0043b4: 1301 0040                              |020e: const/16 v1, #int 16384 // #4000
 0043b8: 4b01 0004                              |0210: aput v1, v0, v4
 0043bc: 1301 0b00                              |0212: const/16 v1, #int 11 // #b
-0043c0: 1402 0080 0000                         |0214: const v2, #float 0.000000 // #00008000
+0043c0: 1402 0080 0000                         |0214: const v2, #float 4.59177e-41 // #00008000
 0043c6: 4b02 0001                              |0217: aput v2, v0, v1
 0043ca: 1301 0d00                              |0219: const/16 v1, #int 13 // #d
 0043ce: 1502 0100                              |021b: const/high16 v2, #int 65536 // #1
@@ -4167,7 +4167,7 @@
 004588: 1301 0900                              |02f8: const/16 v1, #int 9 // #9
 00458c: 1302 0040                              |02fa: const/16 v2, #int 16384 // #4000
 004590: 4b02 0001                              |02fc: aput v2, v0, v1
-004594: 1401 0080 0000                         |02fe: const v1, #float 0.000000 // #00008000
+004594: 1401 0080 0000                         |02fe: const v1, #float 4.59177e-41 // #00008000
 00459a: 4b01 0004                              |0301: aput v1, v0, v4
 00459e: 1301 0c00                              |0303: const/16 v1, #int 12 // #c
 0045a2: 1502 0100                              |0305: const/high16 v2, #int 65536 // #1
@@ -4226,7 +4226,7 @@
 00466e: 1302 0040                              |036b: const/16 v2, #int 16384 // #4000
 004672: 4b02 0001                              |036d: aput v2, v0, v1
 004676: 1261                                   |036f: const/4 v1, #int 6 // #6
-004678: 1402 0080 0000                         |0370: const v2, #float 0.000000 // #00008000
+004678: 1402 0080 0000                         |0370: const v2, #float 4.59177e-41 // #00008000
 00467e: 4b02 0001                              |0373: aput v2, v0, v1
 004682: 1301 0800                              |0375: const/16 v1, #int 8 // #8
 004686: 1502 0200                              |0377: const/high16 v2, #int 131072 // #2
@@ -4496,7 +4496,7 @@
 004c16: 3803 3400                              |0047: if-eqz v3, 007b // +0034
 004c1a: 0800 1800                              |0049: move-object/from16 v0, v24
 004c1e: 5203 5100                              |004b: iget v3, v0, Lcom/google/android/checkers/a;.x:I // field@0051
-004c22: 1404 ffff 0f00                         |004d: const v4, #float 0.000000 // #000fffff
+004c22: 1404 ffff 0f00                         |004d: const v4, #float 1.46937e-39 // #000fffff
 004c28: b534                                   |0050: and-int/2addr v4, v3
 004c2a: 0800 1800                              |0051: move-object/from16 v0, v24
 004c2e: 5405 5200                              |0053: iget-object v5, v0, Lcom/google/android/checkers/a;.y:[I // field@0052
@@ -4516,7 +4516,7 @@
 004c66: 5405 5300                              |006f: iget-object v5, v0, Lcom/google/android/checkers/a;.z:[S // field@0053
 004c6a: 4a04 0504                              |0071: aget-short v4, v5, v4
 004c6e: 2c03 8104 0000                         |0073: sparse-switch v3, 000004f4 // +00000481
-004c74: 1403 3f42 0f00                         |0076: const v3, #float 0.000000 // #000f423f
+004c74: 1403 3f42 0f00                         |0076: const v3, #float 1.4013e-39 // #000f423f
 004c7a: 3334 a1ff                              |0079: if-ne v4, v3, 001a // -005f
 004c7e: 0800 1800                              |007b: move-object/from16 v0, v24
 004c82: 0201 1b00                              |007d: move/from16 v1, v27
@@ -4897,7 +4897,7 @@
 0051da: 28c4                                   |0329: goto 02ed // -003c
 0051dc: 0200 1900                              |032a: move/from16 v0, v25
 0051e0: 3704 4afd                              |032c: if-le v4, v0, 0076 // -02b6
-0051e4: 1404 3f42 0f00                         |032e: const v4, #float 0.000000 // #000f423f
+0051e4: 1404 3f42 0f00                         |032e: const v4, #float 1.4013e-39 // #000f423f
 0051ea: 2900 45fd                              |0331: goto/16 0076 // -02bb
 0051ee: 0200 1a00                              |0333: move/from16 v0, v26
 0051f2: 3404 f9ff                              |0335: if-lt v4, v0, 032e // -0007
@@ -5020,7 +5020,7 @@
 0053a2: 3545 bd00                              |040d: if-ge v5, v4, 04ca // +00bd
 0053a6: 0800 1800                              |040f: move-object/from16 v0, v24
 0053aa: 5204 3e00                              |0411: iget v4, v0, Lcom/google/android/checkers/a;.e:I // field@003e
-0053ae: 1405 1100 0088                         |0413: const v5, #float -0.000000 // #88000011
+0053ae: 1405 1100 0088                         |0413: const v5, #float -3.85187e-34 // #88000011
 0053b4: b554                                   |0416: and-int/2addr v4, v5
 0053b6: 3804 0900                              |0417: if-eqz v4, 0420 // +0009
 0053ba: 7110 9e00 0400                         |0419: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
@@ -5052,11 +5052,11 @@
 005418: 1507 00a0                              |0448: const/high16 v7, #int -1610612736 // #a000
 00541c: 3376 0400                              |044a: if-ne v6, v7, 044e // +0004
 005420: d803 03f4                              |044c: add-int/lit8 v3, v3, #int -12 // #f4
-005424: 1406 0066 6600                         |044e: const v6, #float 0.000000 // #00666600
+005424: 1406 0066 6600                         |044e: const v6, #float 9.40381e-39 // #00666600
 00542a: b564                                   |0451: and-int/2addr v4, v6
 00542c: 7110 9e00 0400                         |0452: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
 005432: 0a04                                   |0455: move-result v4
-005434: 1406 0066 6600                         |0456: const v6, #float 0.000000 // #00666600
+005434: 1406 0066 6600                         |0456: const v6, #float 9.40381e-39 // #00666600
 00543a: b565                                   |0459: and-int/2addr v5, v6
 00543c: 7110 9e00 0500                         |045a: invoke-static {v5}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
 005442: 0a05                                   |045d: move-result v5
@@ -5064,13 +5064,13 @@
 005446: b043                                   |045f: add-int/2addr v3, v4
 005448: 0800 1800                              |0460: move-object/from16 v0, v24
 00544c: 5204 3d00                              |0462: iget v4, v0, Lcom/google/android/checkers/a;.d:I // field@003d
-005450: 1405 1818 1818                         |0464: const v5, #float 0.000000 // #18181818
+005450: 1405 1818 1818                         |0464: const v5, #float 1.96577e-24 // #18181818
 005456: b554                                   |0467: and-int/2addr v4, v5
 005458: 7110 9e00 0400                         |0468: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
 00545e: 0a04                                   |046b: move-result v4
 005460: 0800 1800                              |046c: move-object/from16 v0, v24
 005464: 5205 3f00                              |046e: iget v5, v0, Lcom/google/android/checkers/a;.f:I // field@003f
-005468: 1406 1818 1818                         |0470: const v6, #float 0.000000 // #18181818
+005468: 1406 1818 1818                         |0470: const v6, #float 1.96577e-24 // #18181818
 00546e: b565                                   |0473: and-int/2addr v5, v6
 005470: 7110 9e00 0500                         |0474: invoke-static {v5}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
 005476: 0a05                                   |0477: move-result v5
@@ -5078,7 +5078,7 @@
 00547a: b143                                   |0479: sub-int/2addr v3, v4
 00547c: 0800 1800                              |047a: move-object/from16 v0, v24
 005480: 5204 3e00                              |047c: iget v4, v0, Lcom/google/android/checkers/a;.e:I // field@003e
-005484: 1405 0800 0010                         |047e: const v5, #float 0.000000 // #10000008
+005484: 1405 0800 0010                         |047e: const v5, #float 2.52436e-29 // #10000008
 00548a: b554                                   |0481: and-int/2addr v4, v5
 00548c: 3804 0900                              |0482: if-eqz v4, 048b // +0009
 005490: 7110 9e00 0400                         |0484: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
@@ -5087,7 +5087,7 @@
 00549c: b143                                   |048a: sub-int/2addr v3, v4
 00549e: 0800 1800                              |048b: move-object/from16 v0, v24
 0054a2: 5204 4000                              |048d: iget v4, v0, Lcom/google/android/checkers/a;.g:I // field@0040
-0054a6: 1405 0800 0010                         |048f: const v5, #float 0.000000 // #10000008
+0054a6: 1405 0800 0010                         |048f: const v5, #float 2.52436e-29 // #10000008
 0054ac: b554                                   |0492: and-int/2addr v4, v5
 0054ae: 3804 4c00                              |0493: if-eqz v4, 04df // +004c
 0054b2: 7110 9e00 0400                         |0495: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
@@ -5124,7 +5124,7 @@
 00551c: 3745 56ff                              |04ca: if-le v5, v4, 0420 // -00aa
 005520: 0800 1800                              |04cc: move-object/from16 v0, v24
 005524: 5204 4000                              |04ce: iget v4, v0, Lcom/google/android/checkers/a;.g:I // field@0040
-005528: 1405 1100 0088                         |04d0: const v5, #float -0.000000 // #88000011
+005528: 1405 1100 0088                         |04d0: const v5, #float -3.85187e-34 // #88000011
 00552e: b554                                   |04d3: and-int/2addr v4, v5
 005530: 3804 4cff                              |04d4: if-eqz v4, 0420 // -00b4
 005534: 7110 9e00 0400                         |04d6: invoke-static {v4}, Ljava/lang/Integer;.bitCount:(I)I // method@009e
@@ -5407,7 +5407,7 @@
 005868: 0126                                   |0010: move v6, v2
 00586a: 0135                                   |0011: move v5, v3
 00586c: 5240 5100                              |0012: iget v0, v4, Lcom/google/android/checkers/a;.x:I // field@0051
-005870: 1401 ffff 0f00                         |0014: const v1, #float 0.000000 // #000fffff
+005870: 1401 ffff 0f00                         |0014: const v1, #float 1.46937e-39 // #000fffff
 005876: b501                                   |0017: and-int/2addr v1, v0
 005878: 5442 5200                              |0018: iget-object v2, v4, Lcom/google/android/checkers/a;.y:[I // field@0052
 00587c: 4b00 0201                              |001a: aput v0, v2, v1
@@ -5568,19 +5568,19 @@
 005a54: e203 1404                              |0016: ushr-int/lit8 v3, v20, #int 4 // #04
 005a58: b543                                   |0018: and-int/2addr v3, v4
 005a5a: 3803 1200                              |0019: if-eqz v3, 002b // +0012
-005a5e: 1401 e0e0 e0e0                         |001b: const v1, #float -129633581999069331456.000000 // #e0e0e0e0
+005a5e: 1401 e0e0 e0e0                         |001b: const v1, #float -1.29634e+20 // #e0e0e0e0
 005a64: b531                                   |001e: and-int/2addr v1, v3
 005a66: e201 0105                              |001f: ushr-int/lit8 v1, v1, #int 5 // #05
-005a6a: 1405 0007 0707                         |0021: const v5, #float 0.000000 // #07070700
+005a6a: 1405 0007 0707                         |0021: const v5, #float 1.01583e-34 // #07070700
 005a70: b553                                   |0024: and-int/2addr v3, v5
 005a72: e203 0303                              |0025: ushr-int/lit8 v3, v3, #int 3 // #03
 005a76: b631                                   |0027: or-int/2addr v1, v3
 005a78: b521                                   |0028: and-int/2addr v1, v2
 005a7a: de01 0100                              |0029: or-int/lit8 v1, v1, #int 0 // #00
-005a7e: 1403 e0e0 e0e0                         |002b: const v3, #float -129633581999069331456.000000 // #e0e0e0e0
+005a7e: 1403 e0e0 e0e0                         |002b: const v3, #float -1.29634e+20 // #e0e0e0e0
 005a84: 9503 0314                              |002e: and-int v3, v3, v20
 005a88: e203 0305                              |0030: ushr-int/lit8 v3, v3, #int 5 // #05
-005a8c: 1405 0007 0707                         |0032: const v5, #float 0.000000 // #07070700
+005a8c: 1405 0007 0707                         |0032: const v5, #float 1.01583e-34 // #07070700
 005a92: 9505 0514                              |0035: and-int v5, v5, v20
 005a96: e205 0503                              |0037: ushr-int/lit8 v5, v5, #int 3 // #03
 005a9a: b653                                   |0039: or-int/2addr v3, v5
@@ -5597,19 +5597,19 @@
 005abc: 3802 1500                              |004a: if-eqz v2, 005f // +0015
 005ac0: 0800 1100                              |004c: move-object/from16 v0, v17
 005ac4: 5203 3e00                              |004e: iget v3, v0, Lcom/google/android/checkers/a;.e:I // field@003e
-005ac8: 1405 0707 0707                         |0050: const v5, #float 0.000000 // #07070707
+005ac8: 1405 0707 0707                         |0050: const v5, #float 1.01583e-34 // #07070707
 005ace: b525                                   |0053: and-int/2addr v5, v2
 005ad0: e005 0505                              |0054: shl-int/lit8 v5, v5, #int 5 // #05
-005ad4: 1406 e0e0 e000                         |0056: const v6, #float 0.000000 // #00e0e0e0
+005ad4: 1406 e0e0 e000                         |0056: const v6, #float 2.06518e-38 // #00e0e0e0
 005ada: b562                                   |0059: and-int/2addr v2, v6
 005adc: e002 0203                              |005a: shl-int/lit8 v2, v2, #int 3 // #03
 005ae0: b652                                   |005c: or-int/2addr v2, v5
 005ae2: b532                                   |005d: and-int/2addr v2, v3
 005ae4: b621                                   |005e: or-int/2addr v1, v2
-005ae6: 1402 0707 0707                         |005f: const v2, #float 0.000000 // #07070707
+005ae6: 1402 0707 0707                         |005f: const v2, #float 1.01583e-34 // #07070707
 005aec: 9502 0214                              |0062: and-int v2, v2, v20
 005af0: e002 0205                              |0064: shl-int/lit8 v2, v2, #int 5 // #05
-005af4: 1403 e0e0 e000                         |0066: const v3, #float 0.000000 // #00e0e0e0
+005af4: 1403 e0e0 e000                         |0066: const v3, #float 2.06518e-38 // #00e0e0e0
 005afa: 9503 0314                              |0069: and-int v3, v3, v20
 005afe: e003 0303                              |006b: shl-int/lit8 v3, v3, #int 3 // #03
 005b02: b632                                   |006d: or-int/2addr v2, v3
@@ -5802,19 +5802,19 @@
 005dd4: 3803 1600                              |01d6: if-eqz v3, 01ec // +0016
 005dd8: 0800 1100                              |01d8: move-object/from16 v0, v17
 005ddc: 5201 4000                              |01da: iget v1, v0, Lcom/google/android/checkers/a;.g:I // field@0040
-005de0: 1405 e0e0 e0e0                         |01dc: const v5, #float -129633581999069331456.000000 // #e0e0e0e0
+005de0: 1405 e0e0 e0e0                         |01dc: const v5, #float -1.29634e+20 // #e0e0e0e0
 005de6: b535                                   |01df: and-int/2addr v5, v3
 005de8: e205 0505                              |01e0: ushr-int/lit8 v5, v5, #int 5 // #05
-005dec: 1406 0007 0707                         |01e2: const v6, #float 0.000000 // #07070700
+005dec: 1406 0007 0707                         |01e2: const v6, #float 1.01583e-34 // #07070700
 005df2: b563                                   |01e5: and-int/2addr v3, v6
 005df4: e203 0303                              |01e6: ushr-int/lit8 v3, v3, #int 3 // #03
 005df8: b653                                   |01e8: or-int/2addr v3, v5
 005dfa: b531                                   |01e9: and-int/2addr v1, v3
 005dfc: de01 0100                              |01ea: or-int/lit8 v1, v1, #int 0 // #00
-005e00: 1403 e0e0 e0e0                         |01ec: const v3, #float -129633581999069331456.000000 // #e0e0e0e0
+005e00: 1403 e0e0 e0e0                         |01ec: const v3, #float -1.29634e+20 // #e0e0e0e0
 005e06: 9503 0314                              |01ef: and-int v3, v3, v20
 005e0a: e203 0305                              |01f1: ushr-int/lit8 v3, v3, #int 5 // #05
-005e0e: 1405 0007 0707                         |01f3: const v5, #float 0.000000 // #07070700
+005e0e: 1405 0007 0707                         |01f3: const v5, #float 1.01583e-34 // #07070700
 005e14: 9505 0514                              |01f6: and-int v5, v5, v20
 005e18: e205 0503                              |01f8: ushr-int/lit8 v5, v5, #int 3 // #03
 005e1c: b653                                   |01fa: or-int/2addr v3, v5
@@ -5828,19 +5828,19 @@
 005e34: e003 1404                              |0206: shl-int/lit8 v3, v20, #int 4 // #04
 005e38: b543                                   |0208: and-int/2addr v3, v4
 005e3a: 3803 1100                              |0209: if-eqz v3, 021a // +0011
-005e3e: 1405 0707 0707                         |020b: const v5, #float 0.000000 // #07070707
+005e3e: 1405 0707 0707                         |020b: const v5, #float 1.01583e-34 // #07070707
 005e44: b535                                   |020e: and-int/2addr v5, v3
 005e46: e005 0505                              |020f: shl-int/lit8 v5, v5, #int 5 // #05
-005e4a: 1406 e0e0 e000                         |0211: const v6, #float 0.000000 // #00e0e0e0
+005e4a: 1406 e0e0 e000                         |0211: const v6, #float 2.06518e-38 // #00e0e0e0
 005e50: b563                                   |0214: and-int/2addr v3, v6
 005e52: e003 0303                              |0215: shl-int/lit8 v3, v3, #int 3 // #03
 005e56: b653                                   |0217: or-int/2addr v3, v5
 005e58: b523                                   |0218: and-int/2addr v3, v2
 005e5a: b631                                   |0219: or-int/2addr v1, v3
-005e5c: 1403 0707 0707                         |021a: const v3, #float 0.000000 // #07070707
+005e5c: 1403 0707 0707                         |021a: const v3, #float 1.01583e-34 // #07070707
 005e62: 9503 0314                              |021d: and-int v3, v3, v20
 005e66: e003 0305                              |021f: shl-int/lit8 v3, v3, #int 5 // #05
-005e6a: 1405 e0e0 e000                         |0221: const v5, #float 0.000000 // #00e0e0e0
+005e6a: 1405 e0e0 e000                         |0221: const v5, #float 2.06518e-38 // #00e0e0e0
 005e70: 9505 0514                              |0224: and-int v5, v5, v20
 005e74: e005 0503                              |0226: shl-int/lit8 v5, v5, #int 3 // #03
 005e78: b653                                   |0228: or-int/2addr v3, v5
@@ -6423,9 +6423,9 @@
       outs          : 6
       insns size    : 461 16-bit code units
 006604:                                        |[006604] com.google.android.checkers.a.b:(IZI)Z
-006614: 1404 e0e0 e000                         |0000: const v4, #float 0.000000 // #00e0e0e0
+006614: 1404 e0e0 e000                         |0000: const v4, #float 2.06518e-38 // #00e0e0e0
 00661a: 1216                                   |0003: const/4 v6, #int 1 // #1
-00661c: 1403 e0e0 e0e0                         |0004: const v3, #float -129633581999069331456.000000 // #e0e0e0e0
+00661c: 1403 e0e0 e0e0                         |0004: const v3, #float -1.29634e+20 // #e0e0e0e0
 006622: 130a 0008                              |0007: const/16 v10, #int 2048 // #800
 006626: 1309 0002                              |0009: const/16 v9, #int 512 // #200
 00662a: 380d e400                              |000b: if-eqz v13, 00ef // +00e4
@@ -6436,7 +6436,7 @@
 00663e: 9502 0e03                              |0015: and-int v2, v14, v3
 006642: e202 0205                              |0017: ushr-int/lit8 v2, v2, #int 5 // #05
 006646: b621                                   |0019: or-int/2addr v1, v2
-006648: 1402 0007 0707                         |001a: const v2, #float 0.000000 // #07070700
+006648: 1402 0007 0707                         |001a: const v2, #float 1.01583e-34 // #07070700
 00664e: b5e2                                   |001d: and-int/2addr v2, v14
 006650: e202 0203                              |001e: ushr-int/lit8 v2, v2, #int 3 // #03
 006654: b621                                   |0020: or-int/2addr v1, v2
@@ -6453,14 +6453,14 @@
 006676: 9502 0e03                              |0031: and-int v2, v14, v3
 00667a: e202 0205                              |0033: ushr-int/lit8 v2, v2, #int 5 // #05
 00667e: b621                                   |0035: or-int/2addr v1, v2
-006680: 1402 0007 0707                         |0036: const v2, #float 0.000000 // #07070700
+006680: 1402 0007 0707                         |0036: const v2, #float 1.01583e-34 // #07070700
 006686: b5e2                                   |0039: and-int/2addr v2, v14
 006688: e202 0203                              |003a: ushr-int/lit8 v2, v2, #int 3 // #03
 00668c: b621                                   |003c: or-int/2addr v1, v2
 00668e: b510                                   |003d: and-int/2addr v0, v1
 006690: 52b1 3e00                              |003e: iget v1, v11, Lcom/google/android/checkers/a;.e:I // field@003e
 006694: e002 0e04                              |0040: shl-int/lit8 v2, v14, #int 4 // #04
-006698: 1403 0707 0707                         |0042: const v3, #float 0.000000 // #07070707
+006698: 1403 0707 0707                         |0042: const v3, #float 1.01583e-34 // #07070707
 00669e: b5e3                                   |0045: and-int/2addr v3, v14
 0066a0: e003 0305                              |0046: shl-int/lit8 v3, v3, #int 5 // #05
 0066a4: b632                                   |0048: or-int/2addr v2, v3
@@ -6563,7 +6563,7 @@
 0067f6: 3900 5400                              |00f1: if-nez v0, 0145 // +0054
 0067fa: 52b0 3f00                              |00f3: iget v0, v11, Lcom/google/android/checkers/a;.f:I // field@003f
 0067fe: e001 0e04                              |00f5: shl-int/lit8 v1, v14, #int 4 // #04
-006802: 1402 0707 0707                         |00f7: const v2, #float 0.000000 // #07070707
+006802: 1402 0707 0707                         |00f7: const v2, #float 1.01583e-34 // #07070707
 006808: b5e2                                   |00fa: and-int/2addr v2, v14
 00680a: e002 0205                              |00fb: shl-int/lit8 v2, v2, #int 5 // #05
 00680e: b621                                   |00fd: or-int/2addr v1, v2
@@ -6611,7 +6611,7 @@
 0068a2: 52b1 3f00                              |0147: iget v1, v11, Lcom/google/android/checkers/a;.f:I // field@003f
 0068a6: b610                                   |0149: or-int/2addr v0, v1
 0068a8: e001 0e04                              |014a: shl-int/lit8 v1, v14, #int 4 // #04
-0068ac: 1402 0707 0707                         |014c: const v2, #float 0.000000 // #07070707
+0068ac: 1402 0707 0707                         |014c: const v2, #float 1.01583e-34 // #07070707
 0068b2: b5e2                                   |014f: and-int/2addr v2, v14
 0068b4: e002 0205                              |0150: shl-int/lit8 v2, v2, #int 5 // #05
 0068b8: b621                                   |0152: or-int/2addr v1, v2
@@ -6624,7 +6624,7 @@
 0068ce: b5e3                                   |015d: and-int/2addr v3, v14
 0068d0: e203 0305                              |015e: ushr-int/lit8 v3, v3, #int 5 // #05
 0068d4: b632                                   |0160: or-int/2addr v2, v3
-0068d6: 1403 0007 0707                         |0161: const v3, #float 0.000000 // #07070700
+0068d6: 1403 0007 0707                         |0161: const v3, #float 1.01583e-34 // #07070700
 0068dc: b5e3                                   |0164: and-int/2addr v3, v14
 0068de: e203 0303                              |0165: ushr-int/lit8 v3, v3, #int 3 // #03
 0068e2: b632                                   |0167: or-int/2addr v2, v3
diff --git a/test/dexdump/run-all-tests b/test/dexdump/run-all-tests
index 11ab55a..c9976cd 100755
--- a/test/dexdump/run-all-tests
+++ b/test/dexdump/run-all-tests
@@ -39,7 +39,7 @@
 
 # Set up dexdump binary and flags to test.
 DEXD="${ANDROID_HOST_OUT}/bin/dexdump2"
-DEXDFLAGS1="-dfh"
+DEXDFLAGS1="-adfh"
 DEXDFLAGS2="-e -l xml"
 
 # Set up dexlist binary and flags to test.
diff --git a/test/dexdump/staticfields.txt b/test/dexdump/staticfields.txt
index 022605f..f6d8f19 100644
--- a/test/dexdump/staticfields.txt
+++ b/test/dexdump/staticfields.txt
@@ -12,8 +12,8 @@
 string_ids_off      : 112 (0x000070)
 type_ids_size       : 12
 type_ids_off        : 224 (0x0000e0)
-proto_ids_size       : 1
-proto_ids_off        : 272 (0x000110)
+proto_ids_size      : 1
+proto_ids_off       : 272 (0x000110)
 field_ids_size      : 12
 field_ids_off       : 284 (0x00011c)
 method_ids_size     : 2
@@ -71,12 +71,12 @@
       name          : 'test05_public_static_final_float_46_47'
       type          : 'F'
       access        : 0x0019 (PUBLIC STATIC FINAL)
-      value         : 46.470001
+      value         : 46.47
     #6              : (in LStaticFields;)
       name          : 'test06_public_static_final_double_48_49'
       type          : 'D'
       access        : 0x0019 (PUBLIC STATIC FINAL)
-      value         : 48.490000
+      value         : 48.49
     #7              : (in LStaticFields;)
       name          : 'test07_public_static_final_string'
       type          : 'Ljava/lang/String;'
diff --git a/test/dexdump/staticfields.xml b/test/dexdump/staticfields.xml
index c906f0a..9082f0e 100644
--- a/test/dexdump/staticfields.xml
+++ b/test/dexdump/staticfields.xml
@@ -66,7 +66,7 @@
  static="true"
  final="true"
  visibility="public"
- value="46.470001"
+ value="46.47"
 >
 </field>
 <field name="test06_public_static_final_double_48_49"
@@ -76,7 +76,7 @@
  static="true"
  final="true"
  visibility="public"
- value="48.490000"
+ value="48.49"
 >
 </field>
 <field name="test07_public_static_final_string"
@@ -86,7 +86,7 @@
  static="true"
  final="true"
  visibility="public"
- value="abc \>&lt;&quot;'&amp;&#x9;&#xD;&#xA;"
+ value="abc \&gt;&lt;&quot;'&amp;&#x9;&#xD;&#xA;"
 >
 </field>
 <field name="test08_public_static_final_object_null"
diff --git a/test/dexdump/values.dex b/test/dexdump/values.dex
new file mode 100644
index 0000000..84602d0
--- /dev/null
+++ b/test/dexdump/values.dex
Binary files differ
diff --git a/test/dexdump/values.lst b/test/dexdump/values.lst
new file mode 100644
index 0000000..0dbe3a9
--- /dev/null
+++ b/test/dexdump/values.lst
@@ -0,0 +1,3 @@
+#values.dex
+0x000003bc 8 Test <clinit> ()V Test.java 66
+0x000003d4 8 Test <init> ()V Test.java 1
diff --git a/test/dexdump/values.txt b/test/dexdump/values.txt
new file mode 100644
index 0000000..7f831b1
--- /dev/null
+++ b/test/dexdump/values.txt
@@ -0,0 +1,355 @@
+Processing 'values.dex'...
+Opened 'values.dex', DEX version '035'
+DEX file header:
+magic               : 'dex\n035\0'
+checksum            : 7605eec0
+signature           : c197...a065
+file_size           : 1864
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 70
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 12
+type_ids_off        : 392 (0x000188)
+proto_ids_size      : 1
+proto_ids_off       : 440 (0x0001b8)
+field_ids_size      : 54
+field_ids_off       : 452 (0x0001c4)
+method_ids_size     : 3
+method_ids_off      : 884 (0x000374)
+class_defs_size     : 1
+class_defs_off      : 908 (0x00038c)
+data_size           : 924
+data_off            : 940 (0x0003ac)
+
+Class #0 header:
+class_idx           : 6
+access_flags        : 1 (0x0001)
+superclass_idx      : 7
+interfaces_off      : 0 (0x000000)
+source_file_idx     : 13
+annotations_off     : 0 (0x000000)
+class_data_off      : 1578 (0x00062a)
+static_fields_size  : 54
+instance_fields_size: 0
+direct_methods_size : 2
+virtual_methods_size: 0
+
+Class #0            -
+  Class descriptor  : 'LTest;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+    #0              : (in LTest;)
+      name          : 'mB0'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #1              : (in LTest;)
+      name          : 'mB1'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 127
+    #2              : (in LTest;)
+      name          : 'mB2'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -128
+    #3              : (in LTest;)
+      name          : 'mB3'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1
+    #4              : (in LTest;)
+      name          : 'mC0'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #5              : (in LTest;)
+      name          : 'mC1'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 32767
+    #6              : (in LTest;)
+      name          : 'mC2'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 32768
+    #7              : (in LTest;)
+      name          : 'mC3'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 255
+    #8              : (in LTest;)
+      name          : 'mC4'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 65520
+    #9              : (in LTest;)
+      name          : 'mC5'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 65535
+    #10              : (in LTest;)
+      name          : 'mD0'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -inf
+    #11              : (in LTest;)
+      name          : 'mD1'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 4.94066e-324
+    #12              : (in LTest;)
+      name          : 'mD2'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -0
+    #13              : (in LTest;)
+      name          : 'mD3'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #14              : (in LTest;)
+      name          : 'mD4'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 1.79769e+308
+    #15              : (in LTest;)
+      name          : 'mD5'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : inf
+    #16              : (in LTest;)
+      name          : 'mD6'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : nan
+    #17              : (in LTest;)
+      name          : 'mF0'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -inf
+    #18              : (in LTest;)
+      name          : 'mF1'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 1.4013e-45
+    #19              : (in LTest;)
+      name          : 'mF2'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -0
+    #20              : (in LTest;)
+      name          : 'mF3'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #21              : (in LTest;)
+      name          : 'mF4'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 3.40282e+38
+    #22              : (in LTest;)
+      name          : 'mF5'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : inf
+    #23              : (in LTest;)
+      name          : 'mF6'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : nan
+    #24              : (in LTest;)
+      name          : 'mI0'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #25              : (in LTest;)
+      name          : 'mI1'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 2147483647
+    #26              : (in LTest;)
+      name          : 'mI2'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -2147483648
+    #27              : (in LTest;)
+      name          : 'mI3'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 255
+    #28              : (in LTest;)
+      name          : 'mI4'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -16
+    #29              : (in LTest;)
+      name          : 'mI5'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -4096
+    #30              : (in LTest;)
+      name          : 'mI6'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1048576
+    #31              : (in LTest;)
+      name          : 'mI7'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1
+    #32              : (in LTest;)
+      name          : 'mJ0'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #33              : (in LTest;)
+      name          : 'mJ1'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 9223372036854775807
+    #34              : (in LTest;)
+      name          : 'mJ2'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -9223372036854775808
+    #35              : (in LTest;)
+      name          : 'mJ3'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 255
+    #36              : (in LTest;)
+      name          : 'mJ4'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -16
+    #37              : (in LTest;)
+      name          : 'mJ5'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -4096
+    #38              : (in LTest;)
+      name          : 'mJ6'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1048576
+    #39              : (in LTest;)
+      name          : 'mJ7'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -268435456
+    #40              : (in LTest;)
+      name          : 'mJ8'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -68719476736
+    #41              : (in LTest;)
+      name          : 'mJ9'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -17592186044416
+    #42              : (in LTest;)
+      name          : 'mJa'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -4503599627370496
+    #43              : (in LTest;)
+      name          : 'mJb'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1
+    #44              : (in LTest;)
+      name          : 'mObject'
+      type          : 'Ljava/lang/Object;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : null
+    #45              : (in LTest;)
+      name          : 'mS0'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 0
+    #46              : (in LTest;)
+      name          : 'mS1'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 32767
+    #47              : (in LTest;)
+      name          : 'mS2'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -32768
+    #48              : (in LTest;)
+      name          : 'mS3'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 255
+    #49              : (in LTest;)
+      name          : 'mS4'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -16
+    #50              : (in LTest;)
+      name          : 'mS5'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : -1
+    #51              : (in LTest;)
+      name          : 'mString'
+      type          : 'Ljava/lang/String;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : "<&\"JOHO\"&>\n"
+    #52              : (in LTest;)
+      name          : 'mZ0'
+      type          : 'Z'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : false
+    #53              : (in LTest;)
+      name          : 'mZ1'
+      type          : 'Z'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : true
+  Instance fields   -
+  Direct methods    -
+    #0              : (in LTest;)
+      name          : '<clinit>'
+      type          : '()V'
+      access        : 0x10008 (STATIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 4 16-bit code units
+0003ac:                                        |[0003ac] Test.<clinit>:()V
+0003bc: 1200                                   |0000: const/4 v0, #int 0 // #0
+0003be: 6900 2c00                              |0001: sput-object v0, LTest;.mObject:Ljava/lang/Object; // field@002c
+0003c2: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=66
+      locals        : 
+
+    #1              : (in LTest;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+0003c4:                                        |[0003c4] Test.<init>:()V
+0003d4: 7010 0200 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0002
+0003da: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=1
+      locals        : 
+        0x0000 - 0x0004 reg=0 this LTest; 
+
+  Virtual methods   -
+  source_file_idx   : 13 (Test.java)
+
diff --git a/test/dexdump/values.xml b/test/dexdump/values.xml
new file mode 100644
index 0000000..d6ba48d
--- /dev/null
+++ b/test/dexdump/values.xml
@@ -0,0 +1,561 @@
+<api>
+<package name=""
+>
+<class name="Test"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<field name="mB0"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mB1"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="127"
+>
+</field>
+<field name="mB2"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-128"
+>
+</field>
+<field name="mB3"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1"
+>
+</field>
+<field name="mC0"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mC1"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="32767"
+>
+</field>
+<field name="mC2"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="32768"
+>
+</field>
+<field name="mC3"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="255"
+>
+</field>
+<field name="mC4"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="65520"
+>
+</field>
+<field name="mC5"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="65535"
+>
+</field>
+<field name="mD0"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-inf"
+>
+</field>
+<field name="mD1"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="4.94066e-324"
+>
+</field>
+<field name="mD2"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-0"
+>
+</field>
+<field name="mD3"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mD4"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="1.79769e+308"
+>
+</field>
+<field name="mD5"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="inf"
+>
+</field>
+<field name="mD6"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="nan"
+>
+</field>
+<field name="mF0"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-inf"
+>
+</field>
+<field name="mF1"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="1.4013e-45"
+>
+</field>
+<field name="mF2"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-0"
+>
+</field>
+<field name="mF3"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mF4"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="3.40282e+38"
+>
+</field>
+<field name="mF5"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="inf"
+>
+</field>
+<field name="mF6"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="nan"
+>
+</field>
+<field name="mI0"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mI1"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="2147483647"
+>
+</field>
+<field name="mI2"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-2147483648"
+>
+</field>
+<field name="mI3"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="255"
+>
+</field>
+<field name="mI4"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-16"
+>
+</field>
+<field name="mI5"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-4096"
+>
+</field>
+<field name="mI6"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1048576"
+>
+</field>
+<field name="mI7"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1"
+>
+</field>
+<field name="mJ0"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mJ1"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="9223372036854775807"
+>
+</field>
+<field name="mJ2"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-9223372036854775808"
+>
+</field>
+<field name="mJ3"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="255"
+>
+</field>
+<field name="mJ4"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-16"
+>
+</field>
+<field name="mJ5"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-4096"
+>
+</field>
+<field name="mJ6"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1048576"
+>
+</field>
+<field name="mJ7"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-268435456"
+>
+</field>
+<field name="mJ8"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-68719476736"
+>
+</field>
+<field name="mJ9"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-17592186044416"
+>
+</field>
+<field name="mJa"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-4503599627370496"
+>
+</field>
+<field name="mJb"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1"
+>
+</field>
+<field name="mObject"
+ type="java.lang.Object"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="null"
+>
+</field>
+<field name="mS0"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="0"
+>
+</field>
+<field name="mS1"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="32767"
+>
+</field>
+<field name="mS2"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-32768"
+>
+</field>
+<field name="mS3"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="255"
+>
+</field>
+<field name="mS4"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-16"
+>
+</field>
+<field name="mS5"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="-1"
+>
+</field>
+<field name="mString"
+ type="java.lang.String"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="&lt;&amp;&quot;JOHO&quot;&amp;&gt;&#xA;"
+>
+</field>
+<field name="mZ0"
+ type="boolean"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="false"
+>
+</field>
+<field name="mZ1"
+ type="boolean"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="true"
+>
+</field>
+<constructor name="Test"
+ type="Test"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+</class>
+</package>
+</api>
diff --git a/test/etc/default-build b/test/etc/default-build
index 962ae38..37ce0f2 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -64,6 +64,9 @@
 SKIP_DX_MERGER="false"
 EXPERIMENTAL=""
 
+# The key for default arguments if no experimental things are enabled.
+DEFAULT_EXPERIMENT="no-experiment"
+
 # Setup experimental flag mappings in a bash associative array.
 declare -A JACK_EXPERIMENTAL_ARGS
 JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
@@ -72,6 +75,11 @@
 declare -A SMALI_EXPERIMENTAL_ARGS
 SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24"
 
+declare -A JAVAC_EXPERIMENTAL_ARGS
+JAVAC_EXPERIMENTAL_ARGS["default-methods"]="-source 1.8 -target 1.8"
+JAVAC_EXPERIMENTAL_ARGS["lambdas"]="-source 1.8 -target 1.8"
+JAVAC_EXPERIMENTAL_ARGS[${DEFAULT_EXPERIMENT}]="-source 1.7 -target 1.7"
+
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
     shift
@@ -100,6 +108,8 @@
     shift
   elif [ "x$1" = "x--experimental" ]; then
     shift
+    # We have a specific experimental configuration so don't use the default.
+    DEFAULT_EXPERIMENT=""
     EXPERIMENTAL="${EXPERIMENTAL} $1"
     shift
   elif expr "x$1" : "x--" >/dev/null 2>&1; then
@@ -110,10 +120,14 @@
   fi
 done
 
+# Be sure to get any default arguments if not doing any experiments.
+EXPERIMENTAL="${EXPERIMENTAL} ${DEFAULT_EXPERIMENT}"
+
 # Add args from the experimental mappings.
 for experiment in ${EXPERIMENTAL}; do
   JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
   SMALI_ARGS="${SMALI_ARGS} ${SMALI_EXPERIMENTAL_ARGS[${experiment}]}"
+  JAVAC_ARGS="${JAVAC_ARGS} ${JAVAC_EXPERIMENTAL_ARGS[${experiment}]}"
 done
 
 if [ -e classes.dex ]; then
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index a02fe95..d12bd79 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -21,6 +21,7 @@
 EXPERIMENTAL=""
 FALSE_BIN="/system/bin/false"
 FLAGS=""
+ANDROID_FLAGS=""
 GDB=""
 GDB_ARGS=""
 GDB_SERVER="gdbserver"
@@ -42,9 +43,9 @@
 TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
 # Value in seconds
 if [ "$ART_USE_READ_BARRIER" = "true" ]; then
-  TIME_OUT_VALUE=900  # 15 minutes.
+  TIME_OUT_VALUE=1800  # 30 minutes.
 else
-  TIME_OUT_VALUE=600  # 10 minutes.
+  TIME_OUT_VALUE=1200  # 20 minutes.
 fi
 USE_GDB="n"
 USE_JVM="n"
@@ -59,6 +60,9 @@
     if [ "x$1" = "x--quiet" ]; then
         QUIET="y"
         shift
+    elif [ "x$1" = "x-O" ]; then
+        # Ignore this option.
+        shift
     elif [ "x$1" = "x--lib" ]; then
         shift
         if [ "x$1" = "x" ]; then
@@ -93,6 +97,11 @@
         FLAGS="${FLAGS} -Xcompiler-option $option"
         COMPILE_FLAGS="${COMPILE_FLAGS} $option"
         shift
+    elif [ "x$1" = "x--android-runtime-option" ]; then
+        shift
+        option="$1"
+        ANDROID_FLAGS="${ANDROID_FLAGS} $option"
+        shift
     elif [ "x$1" = "x--runtime-option" ]; then
         shift
         option="$1"
@@ -199,6 +208,10 @@
         shift
         INSTRUCTION_SET_FEATURES="$1"
         shift
+    elif [ "x$1" = "x--timeout" ]; then
+        shift
+        TIME_OUT_VALUE="$1"
+        shift
     elif [ "x$1" = "x--" ]; then
         shift
         break
@@ -229,6 +242,7 @@
 done
 
 if [ "$USE_JVM" = "n" ]; then
+    FLAGS="${FLAGS} ${ANDROID_FLAGS}"
     for feature in ${EXPERIMENTAL}; do
         FLAGS="${FLAGS} -Xexperimental:${feature} -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:${feature}"
         COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xexperimental:${feature}"
@@ -323,11 +337,14 @@
 if [ "$INTERPRETER" = "y" ]; then
     INT_OPTS="-Xint"
     if [ "$VERIFY" = "y" ] ; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=interpret-only"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
     elif [ "$VERIFY" = "s" ]; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:softfail"
     else # VERIFY = "n"
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
     fi
@@ -336,18 +353,12 @@
 if [ "$JIT" = "y" ]; then
     INT_OPTS="-Xusejit:true"
     if [ "$VERIFY" = "y" ] ; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
-      if [ "$PREBUILD" = "n" ]; then
-        # Make sure that if we have noprebuild we still JIT as DexClassLoader will
-        # try to compile the dex file.
-        INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
-      fi
     else
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
-      if [ "$PREBUILD" = "n" ]; then
-        INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
-      fi
     fi
 fi
 
@@ -383,14 +394,14 @@
 fi
 
 dex2oat_cmdline="true"
-mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
+mkdir_locations="${DEX_LOCATION}/dalvik-cache/$ISA"
 strip_cmdline="true"
 
 # Pick a base that will force the app image to get relocated.
 app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art"
 
 if [ "$PREBUILD" = "y" ]; then
-  mkdir_cmdline="${mkdir_cmdline} && mkdir -p ${DEX_LOCATION}/oat/$ISA"
+  mkdir_locations="${mkdir_locations} ${DEX_LOCATION}/oat/$ISA"
   dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \
                       $COMPILE_FLAGS \
                       --boot-image=${BOOT_IMAGE} \
@@ -468,12 +479,12 @@
       adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
     fi
 
-    LD_LIBRARY_PATH=
+    LD_LIBRARY_PATH=/data/art-test/$ISA
     if [ "$ANDROID_ROOT" != "/system" ]; then
       # Current default installation is dalvikvm 64bits and dex2oat 32bits,
       # so we can only use LD_LIBRARY_PATH when testing on a local
       # installation.
-      LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBRARY_DIRECTORY
+      LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBRARY_DIRECTORY:$LD_LIBRARY_PATH
     fi
 
     PUBLIC_LIBS=libart.so:libartd.so
@@ -486,7 +497,7 @@
              export ANDROID_ADDITIONAL_PUBLIC_LIBRARIES=$PUBLIC_LIBS && \
              export DEX_LOCATION=$DEX_LOCATION && \
              export ANDROID_ROOT=$ANDROID_ROOT && \
-             $mkdir_cmdline && \
+             mkdir -p ${mkdir_locations} && \
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
              export PATH=$ANDROID_ROOT/bin:$PATH && \
              $dex2oat_cmdline && \
@@ -552,22 +563,20 @@
     if [ "$TIME_OUT" = "timeout" ]; then
       # Add timeout command if time out is desired.
       #
-      # Note: We use nested timeouts. The inner timeout sends SIGRTMIN+2 (usually 36) to ART, which
-      #       will induce a full thread dump before abort. However, dumping threads might deadlock,
-      #       so the outer timeout sends the regular SIGTERM after an additional minute to ensure
-      #       termination (without dumping all threads).
-      TIME_PLUS_ONE=$(($TIME_OUT_VALUE + 60))
-      cmdline="timeout ${TIME_PLUS_ONE}s timeout -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
+      # Note: We first send SIGRTMIN+2 (usually 36) to ART, which will induce a full thread dump
+      #       before abort. However, dumping threads might deadlock, so we also use the "-k"
+      #       option to definitely kill the child.
+      cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
-      echo "$mkdir_cmdline && $dex2oat_cmdline && $strip_cmdline && $cmdline"
+      echo "mkdir -p ${mkdir_locations} && $dex2oat_cmdline && $strip_cmdline && $cmdline"
     fi
 
     cd $ANDROID_BUILD_TOP
 
     rm -rf ${DEX_LOCATION}/dalvik-cache/
-    $mkdir_cmdline || exit 1
+    mkdir -p ${mkdir_locations} || exit 1
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
     $strip_cmdline || { echo "Strip failed." >&2 ; exit 3; }
 
diff --git a/test/run-test b/test/run-test
index ce8c010..4c29420 100755
--- a/test/run-test
+++ b/test/run-test
@@ -37,11 +37,11 @@
 if [ -z "$TMPDIR" ]; then
   tmp_dir="/tmp/$USER/${test_dir}"
 else
-  tmp_dir="${TMPDIR}/$USER/${test_dir}"
+  tmp_dir="${TMPDIR}/${test_dir}"
 fi
 checker="${progdir}/../tools/checker/checker.py"
 export JAVA="java"
-export JAVAC="javac -g -source 1.7 -target 1.7 -Xlint:-options"
+export JAVAC="javac -g -Xlint:-options"
 export RUN="${progdir}/etc/run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
@@ -78,9 +78,14 @@
     export ANDROID_BUILD_TOP=$oldwd
 fi
 
+# ANDROID_HOST_OUT is not set in a build environment.
+if [ -z "$ANDROID_HOST_OUT" ]; then
+    export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
+fi
+
 # If JACK_CLASSPATH is not set, assume it only contains core-libart.
 if [ -z "$JACK_CLASSPATH" ]; then
-  export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
+  export JACK_CLASSPATH="${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
 fi
 
 export JACK="$JACK -g -cp $JACK_CLASSPATH"
@@ -122,10 +127,12 @@
 have_dex2oat="yes"
 have_patchoat="yes"
 have_image="yes"
-image_suffix=""
 pic_image_suffix=""
 multi_image_suffix=""
 android_root="/system"
+# By default we will use optimizing.
+image_args=""
+image_suffix="-optimizing"
 
 while true; do
     if [ "x$1" = "x--host" ]; then
@@ -148,6 +155,7 @@
     elif [ "x$1" = "x--jvm" ]; then
         target_mode="no"
         runtime="jvm"
+        image_args=""
         prebuild_mode="no"
         NEED_DEX="false"
         USE_JACK="false"
@@ -157,6 +165,7 @@
     elif [ "x$1" = "x-O" ]; then
         lib="libart.so"
         testlib="arttest"
+        run_args="${run_args} -O"
         shift
     elif [ "x$1" = "x--dalvik" ]; then
         lib="libdvm.so"
@@ -238,28 +247,28 @@
         shift
     elif [ "x$1" = "x--strace" ]; then
         strace="yes"
-        run_args="${run_args} --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
+        run_args="${run_args} --timeout 1800 --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
         shift
     elif [ "x$1" = "x--zygote" ]; then
         run_args="${run_args} --zygote"
         shift
     elif [ "x$1" = "x--interpreter" ]; then
-        run_args="${run_args} --interpreter --runtime-option -XOatFileManagerCompilerFilter:verify-at-runtime"
+        run_args="${run_args} --interpreter"
         image_suffix="-interpreter"
         shift
     elif [ "x$1" = "x--jit" ]; then
-        run_args="${run_args} --jit --runtime-option -XOatFileManagerCompilerFilter:verify-at-runtime"
+        image_args="--jit"
         image_suffix="-jit"
         shift
     elif [ "x$1" = "x--optimizing" ]; then
-        run_args="${run_args} -Xcompiler-option --compiler-backend=Optimizing"
+        image_args="-Xcompiler-option --compiler-backend=Optimizing"
         image_suffix="-optimizing"
         shift
     elif [ "x$1" = "x--no-verify" ]; then
-        run_args="${run_args} --no-verify --runtime-option -XOatFileManagerCompilerFilter:verify-none"
+        run_args="${run_args} --no-verify"
         shift
     elif [ "x$1" = "x--verify-soft-fail" ]; then
-        run_args="${run_args} --verify-soft-fail --runtime-option -XOatFileManagerCompilerFilter:verify-at-runtime"
+        image_args="--verify-soft-fail"
         image_suffix="-interp-ac"
         shift
     elif [ "x$1" = "x--no-optimize" ]; then
@@ -348,6 +357,7 @@
     fi
 done
 
+run_args="${run_args} ${image_args}"
 # Allocate file descriptor real_stderr and redirect it to the shell's error
 # output (fd 2).
 if [ ${BASH_VERSINFO[1]} -ge 4 ] && [ ${BASH_VERSINFO[2]} -ge 1 ]; then
@@ -389,7 +399,7 @@
   run_args="${run_args} --runtime-option -Xgc:preverify_rosalloc --runtime-option -Xgc:postverify_rosalloc"
 fi
 if [ "$gc_stress" = "true" ]; then
-  run_args="${run_args} --gc-stress --runtime-option -Xgc:SS,gcstress --runtime-option -Xms2m --runtime-option -Xmx16m"
+  run_args="${run_args} --gc-stress --runtime-option -Xgc:gcstress --runtime-option -Xms2m --runtime-option -Xmx16m"
 fi
 if [ "$trace" = "true" ]; then
     run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file-size:2000000"
@@ -458,10 +468,6 @@
     fi
 elif [ "$runtime" = "art" ]; then
     if [ "$target_mode" = "no" ]; then
-        # ANDROID_HOST_OUT is not set in a build environment.
-        if [ -z "$ANDROID_HOST_OUT" ]; then
-            export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
-        fi
         guess_host_arch_name
         run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}${multi_image_suffix}.art"
         run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"
@@ -904,4 +910,8 @@
 
 ) 2>&${real_stderr} 1>&2
 
-exit 1
+if [ "$never_clean" = "yes" ] && [ "$good" = "yes" ]; then
+  exit 0
+else
+  exit 1
+fi
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
new file mode 100644
index 0000000..ed280e4
--- /dev/null
+++ b/test/ti-agent/common_load.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <jni.h>
+#include <stdio.h>
+// TODO I don't know?
+#include "openjdkjvmti/jvmti.h"
+
+#include "art_method-inl.h"
+#include "base/logging.h"
+#include "base/macros.h"
+
+#include "901-hello-ti-agent/basics.h"
+
+namespace art {
+
+using OnLoad   = jint (*)(JavaVM* vm, char* options, void* reserved);
+using OnAttach = jint (*)(JavaVM* vm, char* options, void* reserved);
+
+struct AgentLib {
+  const char* name;
+  OnLoad load;
+  OnAttach attach;
+};
+
+// A list of all the agents we have for testing.
+AgentLib agents[] = {
+  { "901-hello-ti-agent", Test901HelloTi::OnLoad, nullptr },
+};
+
+static AgentLib* FindAgent(char* name) {
+  for (AgentLib& l : agents) {
+    if (strncmp(l.name, name, strlen(l.name)) == 0) {
+      return &l;
+    }
+  }
+  return nullptr;
+}
+
+static bool FindAgentNameAndOptions(char* options,
+                                    /*out*/char** name,
+                                    /*out*/char** other_options) {
+  // Name is the first element.
+  *name = options;
+  char* rest = options;
+  // name is the first thing in the options
+  while (*rest != '\0' && *rest != ',') {
+    rest++;
+  }
+  if (*rest == ',') {
+    *rest = '\0';
+    rest++;
+  }
+  *other_options = rest;
+  return true;
+}
+
+extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm, char* options, void* reserved) {
+  char* remaining_options = nullptr;
+  char* name_option = nullptr;
+  if (!FindAgentNameAndOptions(options, &name_option, &remaining_options)) {
+    printf("Unable to find agent name in options: %s\n", options);
+    return -1;
+  }
+  AgentLib* lib = FindAgent(name_option);
+  if (lib == nullptr) {
+    printf("Unable to find agent named: %s, add it to the list in test/ti-agent/common_load.cc\n",
+           name_option);
+    return -2;
+  }
+  if (lib->load == nullptr) {
+    printf("agent: %s does not include an OnLoad method.\n", name_option);
+    return -3;
+  }
+  return lib->load(vm, remaining_options, reserved);
+}
+
+
+extern "C" JNIEXPORT jint JNICALL Agent_OnAttach(JavaVM* vm, char* options, void* reserved) {
+  char* remaining_options = nullptr;
+  char* name_option = nullptr;
+  if (!FindAgentNameAndOptions(options, &name_option, &remaining_options)) {
+    printf("Unable to find agent name in options: %s\n", options);
+    return -1;
+  }
+  AgentLib* lib = FindAgent(name_option);
+  if (lib == nullptr) {
+    printf("Unable to find agent named: %s, add it to the list in test/ti-agent/common_load.cc\n",
+           name_option);
+    return -2;
+  }
+  if (lib->attach == nullptr) {
+    printf("agent: %s does not include an OnAttach method.\n", name_option);
+    return -3;
+  }
+  return lib->attach(vm, remaining_options, reserved);
+}
+
+}  // namespace art
diff --git a/test/valgrind-target-suppressions.txt b/test/valgrind-target-suppressions.txt
new file mode 100644
index 0000000..7ae6d53
--- /dev/null
+++ b/test/valgrind-target-suppressions.txt
@@ -0,0 +1,52 @@
+# Valgrind does not recognize the ashmen ioctl() calls on ARM64, so it assumes that a size
+# parameter is a pointer.
+{
+   ashmem ioctl
+   Memcheck:Param
+   ioctl(generic)
+   ...
+   fun:ioctl
+   fun:ashmem_create_region
+}
+
+# It seems that on ARM64 Valgrind considers the canary value used by the Clang stack protector to
+# be an uninitialized value.
+{
+   jemalloc chunk_alloc_cache
+   Memcheck:Cond
+   fun:je_chunk_alloc_cache
+}
+
+# The VectorImpl class does not hold a pointer to the allocated SharedBuffer structure, but to the
+# beginning of the data, which is effectively an interior pointer. Valgrind has limitations when
+# dealing with interior pointers.
+{
+   VectorImpl
+   Memcheck:Leak
+   match-leak-kinds:possible
+   fun:malloc
+   # The wildcards make this rule work both for 32-bit and 64-bit environments.
+   fun:_ZN7android12SharedBuffer5allocE?
+   fun:_ZN7android10VectorImpl5_growE??
+}
+
+# Clang/LLVM uses memcpy for *x = *y, even though x == y (which is undefined behavior). Ignore.
+# b/29279679, https://llvm.org/bugs/show_bug.cgi?id=11763
+{
+   MemCpySelfAssign
+   Memcheck:Overlap
+   fun:memcpy
+   fun:je_tsd_set
+   fun:je_tsd_fetch
+   fun:je_malloc_tsd_boot0
+}
+
+# Setenv is known-leaking when overwriting mappings. This is triggered by re-initializing
+# ANDROID_DATA. Ignore all setenv leaks.
+{
+   SetenvAndroidDataReinit
+   Memcheck:Leak
+   match-leak-kinds: definite
+   fun:malloc
+   fun:setenv
+}
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index cfbafde..60e0cd8 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -30,6 +30,10 @@
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE := ahat
+
+# Let users with Java 7 run ahat (b/28303627)
+LOCAL_JAVA_LANGUAGE_VERSION := 1.7
+
 include $(BUILD_HOST_JAVA_LIBRARY)
 
 # --- ahat script ----------------
@@ -66,9 +70,10 @@
 
 # Run ahat-test-dump.jar to generate test-dump.hprof
 AHAT_TEST_DUMP_DEPENDENCIES := \
-	$(ART_HOST_EXECUTABLES) \
-	$(HOST_OUT_EXECUTABLES)/art \
-	$(HOST_CORE_IMG_OUT_BASE)-optimizing-pic$(CORE_IMG_SUFFIX)
+  $(ART_HOST_EXECUTABLES) \
+  $(ART_HOST_SHARED_LIBRARY_DEPENDENCIES) \
+  $(HOST_OUT_EXECUTABLES)/art \
+  $(HOST_CORE_IMG_OUT_BASE)-optimizing-pic$(CORE_IMG_SUFFIX)
 
 $(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_ART := $(HOST_OUT_EXECUTABLES)/art
 $(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_DUMP_JAR := $(AHAT_TEST_DUMP_JAR)
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index d9b26bc..ecf9e53 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -9,6 +9,9 @@
        Serve pages on the given port. Defaults to 7100.
 
 TODO:
+ * Show GC Root paths.
+ * Have a way to diff two heap dumps.
+
  * Add more tips to the help page.
    - Recommend how to start looking at a heap dump.
    - Say how to enable allocation sites.
@@ -36,11 +39,6 @@
    ignored or not?  Is there any interest in what's unreachable, or is it only
    reachable objects that people care about?
 
- * [low priority] Have a way to diff two heap dumps by site.
-   This should be pretty easy to do, actually. The interface is the real
-   question. Maybe: augment each byte count field on every page with the diff
-   if a baseline has been provided, and allow the user to sort by the diff.
-
 Things to Test:
  * That we can open a hprof without an 'app' heap and show a tabulation of
    objects normally sorted by 'app' heap by default.
@@ -77,7 +75,19 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
- 0.4 Pending
+ 0.8 Pending
+
+ 0.7 Aug 16, 2016
+   Launch ahat server before processing the heap dump.
+   Target Java 1.7.
+
+ 0.6 Jun 21, 2016
+   Add support for proguard deobfuscation (pending AOSP push of perflib)
+
+ 0.5 Apr 19, 2016
+   Update perflib to perflib-25.0.0 to improve processing performance.
+
+ 0.4 Feb 23, 2016
    Annotate char[] objects with their string values.
    Show registered native allocations for heap dumps that support it.
 
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index 2adec6f..a8205c7 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -16,6 +16,7 @@
 
 package com.android.ahat;
 
+import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
@@ -24,9 +25,11 @@
 import com.android.tools.perflib.heap.Snapshot;
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
-import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
-import com.google.common.collect.Iterables;
+
 import com.google.common.collect.Lists;
+
+import gnu.trove.TObjectProcedure;
+
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -83,51 +86,61 @@
     mSnapshot = snapshot;
     mHeaps = new ArrayList<Heap>(mSnapshot.getHeaps());
 
-    ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
+    final ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
     for (Heap heap : mHeaps) {
-      long total = 0;
-      for (Instance inst : Iterables.concat(heap.getClasses(), heap.getInstances())) {
-        Instance dominator = inst.getImmediateDominator();
-        if (dominator != null) {
-          total += inst.getSize();
+      // Use a single element array for the total to act as a reference to a
+      // long.
+      final long[] total = new long[]{0};
+      TObjectProcedure<Instance> processInstance = new TObjectProcedure<Instance>() {
+        @Override
+        public boolean execute(Instance inst) {
+          Instance dominator = inst.getImmediateDominator();
+          if (dominator != null) {
+            total[0] += inst.getSize();
 
-          if (dominator == Snapshot.SENTINEL_ROOT) {
-            mRooted.add(inst);
-          }
+            if (dominator == Snapshot.SENTINEL_ROOT) {
+              mRooted.add(inst);
+            }
 
-          // Properly label the class of a class object.
-          if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
-              inst.setClassId(javaLangClass.getId());
-          }
+            // Properly label the class of a class object.
+            if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
+                inst.setClassId(javaLangClass.getId());
+            }
 
-          // Update dominated instances.
-          List<Instance> instances = mDominated.get(dominator);
-          if (instances == null) {
-            instances = new ArrayList<Instance>();
-            mDominated.put(dominator, instances);
-          }
-          instances.add(inst);
+            // Update dominated instances.
+            List<Instance> instances = mDominated.get(dominator);
+            if (instances == null) {
+              instances = new ArrayList<Instance>();
+              mDominated.put(dominator, instances);
+            }
+            instances.add(inst);
 
-          // Update sites.
-          List<StackFrame> path = Collections.emptyList();
-          StackTrace stack = getStack(inst);
-          int stackId = getStackTraceSerialNumber(stack);
-          if (stack != null) {
-            StackFrame[] frames = getStackFrames(stack);
-            if (frames != null && frames.length > 0) {
-              path = Lists.reverse(Arrays.asList(frames));
+            // Update sites.
+            List<StackFrame> path = Collections.emptyList();
+            StackTrace stack = getStack(inst);
+            int stackId = getStackTraceSerialNumber(stack);
+            if (stack != null) {
+              StackFrame[] frames = getStackFrames(stack);
+              if (frames != null && frames.length > 0) {
+                path = Lists.reverse(Arrays.asList(frames));
+              }
+            }
+            mRootSite.add(stackId, 0, path.iterator(), inst);
+
+            // Update native allocations.
+            InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
+            if (alloc != null) {
+              mNativeAllocations.add(alloc);
             }
           }
-          mRootSite.add(stackId, 0, path.iterator(), inst);
-
-          // Update native allocations.
-          InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
-          if (alloc != null) {
-            mNativeAllocations.add(alloc);
-          }
+          return true;
         }
+      };
+      for (Instance instance : heap.getClasses()) {
+        processInstance.execute(instance);
       }
-      mHeapSizes.put(heap, total);
+      heap.forEachInstance(processInstance);
+      mHeapSizes.put(heap, total[0]);
     }
 
     // Record the roots and their types.
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index d7b64e2..8769d11 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -19,9 +19,10 @@
 import com.android.tools.perflib.heap.ArrayInstance;
 import com.android.tools.perflib.heap.ClassInstance;
 import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.Heap;
+import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.Type;
+
 import java.awt.image.BufferedImage;
 
 /**
@@ -42,11 +43,11 @@
    * Returns null if the instance is not a byte array.
    */
   private static byte[] asByteArray(Instance inst) {
-    if (! (inst instanceof ArrayInstance)) {
+    if (!(inst instanceof ArrayInstance)) {
       return null;
     }
 
-    ArrayInstance array = (ArrayInstance)inst;
+    ArrayInstance array = (ArrayInstance) inst;
     if (array.getArrayType() != Type.BYTE) {
       return null;
     }
@@ -54,7 +55,7 @@
     Object[] objs = array.getValues();
     byte[] bytes = new byte[objs.length];
     for (int i = 0; i < objs.length; i++) {
-      Byte b = (Byte)objs[i];
+      Byte b = (Byte) objs[i];
       bytes[i] = b.byteValue();
     }
     return bytes;
@@ -94,9 +95,7 @@
       return null;
     }
 
-    // TODO: When perflib provides a better way to get the length of the
-    // array, we should use that here.
-    int numChars = chars.getValues().length;
+    int numChars = chars.getLength();
     int count = getIntField(inst, "count", numChars);
     if (count == 0) {
       return "";
@@ -143,10 +142,10 @@
     int[] abgr = new int[height * width];
     for (int i = 0; i < abgr.length; i++) {
       abgr[i] = (
-          (((int)buffer[i * 4 + 3] & 0xFF) << 24) +
-          (((int)buffer[i * 4 + 0] & 0xFF) << 16) +
-          (((int)buffer[i * 4 + 1] & 0xFF) << 8) +
-          ((int)buffer[i * 4 + 2] & 0xFF));
+          (((int) buffer[i * 4 + 3] & 0xFF) << 24)
+          + (((int) buffer[i * 4 + 0] & 0xFF) << 16)
+          + (((int) buffer[i * 4 + 1] & 0xFF) << 8)
+          + ((int) buffer[i * 4 + 2] & 0xFF));
     }
 
     BufferedImage bitmap = new BufferedImage(
@@ -185,7 +184,7 @@
     if (!(value instanceof Instance)) {
       return null;
     }
-    return (Instance)value;
+    return (Instance) value;
   }
 
   /**
@@ -199,7 +198,7 @@
     if (!(value instanceof Integer)) {
       return def;
     }
-    return (Integer)value;
+    return (Integer) value;
   }
 
   /**
@@ -213,7 +212,7 @@
     if (!(value instanceof Long)) {
       return def;
     }
-    return (Long)value;
+    return (Long) value;
   }
 
   /**
@@ -226,7 +225,7 @@
     if (!(value instanceof Instance)) {
       return null;
     }
-    return asByteArray((Instance)value);
+    return asByteArray((Instance) value);
   }
 
   // Return the bitmap instance associated with this object, or null if there
@@ -243,9 +242,9 @@
     }
 
     if (inst instanceof ArrayInstance) {
-      ArrayInstance array = (ArrayInstance)inst;
-      if (array.getArrayType() == Type.BYTE && inst.getHardReferences().size() == 1) {
-        Instance ref = inst.getHardReferences().get(0);
+      ArrayInstance array = (ArrayInstance) inst;
+      if (array.getArrayType() == Type.BYTE && inst.getHardReverseReferences().size() == 1) {
+        Instance ref = inst.getHardReverseReferences().get(0);
         ClassObj clsref = ref.getClassObj();
         if (clsref != null && "android.graphics.Bitmap".equals(clsref.getClassName())) {
           return ref;
@@ -323,10 +322,10 @@
     // Note: We know inst as an instance of ClassInstance because we already
     // read the nativePtr field from it.
     Instance registry = null;
-    for (ClassInstance.FieldValue field : ((ClassInstance)inst).getValues()) {
+    for (ClassInstance.FieldValue field : ((ClassInstance) inst).getValues()) {
       Object fieldValue = field.getValue();
       if (fieldValue instanceof Instance) {
-        Instance fieldInst = (Instance)fieldValue;
+        Instance fieldInst = (Instance) fieldValue;
         if (isInstanceOfClass(fieldInst, "libcore.util.NativeAllocationRegistry")) {
           registry = fieldInst;
           break;
@@ -344,7 +343,7 @@
     }
 
     Instance referent = null;
-    for (Instance ref : inst.getHardReferences()) {
+    for (Instance ref : inst.getHardReverseReferences()) {
       if (isInstanceOfClass(ref, "sun.misc.Cleaner")) {
         referent = InstanceUtils.getReferent(ref);
         if (referent != null) {
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index d784599..fdc5a86 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -67,12 +67,14 @@
       return;
     }
 
-    System.out.println("Processing hprof file...");
-    AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof);
-
+    // Launch the server before parsing the hprof file so we get
+    // BindExceptions quickly.
     InetAddress loopback = InetAddress.getLoopbackAddress();
     InetSocketAddress addr = new InetSocketAddress(loopback, port);
     HttpServer server = HttpServer.create(addr, 0);
+
+    System.out.println("Processing hprof file...");
+    AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof);
     server.createContext("/", new AhatHttpHandler(new OverviewHandler(ahat, hprof)));
     server.createContext("/rooted", new AhatHttpHandler(new RootedHandler(ahat)));
     server.createContext("/object", new AhatHttpHandler(new ObjectHandler(ahat)));
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 06023da..4df1be5 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -160,11 +160,11 @@
   private static void printReferences(
       Doc doc, Query query, AhatSnapshot snapshot, Instance inst) {
     doc.section("Objects with References to this Object");
-    if (inst.getHardReferences().isEmpty()) {
+    if (inst.getHardReverseReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
     } else {
       doc.table(new Column("Object"));
-      List<Instance> references = inst.getHardReferences();
+      List<Instance> references = inst.getHardReverseReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
       for (Instance ref : selector.selected()) {
         doc.row(Value.render(snapshot, ref));
@@ -173,10 +173,10 @@
       selector.render(doc);
     }
 
-    if (inst.getSoftReferences() != null) {
+    if (inst.getSoftReverseReferences() != null) {
       doc.section("Objects with Soft References to this Object");
       doc.table(new Column("Object"));
-      List<Instance> references = inst.getSoftReferences();
+      List<Instance> references = inst.getSoftReverseReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
       for (Instance ref : selector.selected()) {
         doc.row(Value.render(snapshot, ref));
diff --git a/tools/ahat/src/Site.java b/tools/ahat/src/Site.java
index d504096..dbb84f6 100644
--- a/tools/ahat/src/Site.java
+++ b/tools/ahat/src/Site.java
@@ -20,6 +20,7 @@
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.StackFrame;
+
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
diff --git a/tools/ahat/src/Sort.java b/tools/ahat/src/Sort.java
index c5f89c3..8a3d9f2 100644
--- a/tools/ahat/src/Sort.java
+++ b/tools/ahat/src/Sort.java
@@ -16,13 +16,14 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.Heap;
+import com.android.tools.perflib.heap.Instance;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
-import java.util.List;
 import java.util.Iterator;
+import java.util.List;
 
 /**
  * Provides Comparators and helper functions for sorting Instances, Sites, and
diff --git a/tools/ahat/src/manifest.txt b/tools/ahat/src/manifest.txt
index 368b744..cac53c5 100644
--- a/tools/ahat/src/manifest.txt
+++ b/tools/ahat/src/manifest.txt
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 0.4
+Implementation-Version: 0.7
 Main-Class: com.android.ahat.Main
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 2eb52bc..12e0338 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -21,7 +21,7 @@
 
 out_dir=${OUT_DIR-out}
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
-common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests ${out_dir}/host/linux-x86/bin/jack"
+common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests mockito-target ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
@@ -46,9 +46,14 @@
 done
 
 if [[ $mode == "host" ]]; then
-  make_command="make $j_arg $showcommands build-art-host-tests $common_targets ${out_dir}/host/linux-x86/lib/libjavacoretests.so ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
+  make_command="make $j_arg $showcommands build-art-host-tests $common_targets"
+  make_command+=" ${out_dir}/host/linux-x86/lib/libjavacoretests.so "
+  make_command+=" ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
-  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh ${out_dir}/host/linux-x86/bin/adb libstdc++"
+  make_command="make $j_arg $showcommands build-art-target-tests $common_targets"
+  make_command+=" libjavacrypto libjavacoretests libnetd_client linker toybox toolbox sh"
+  make_command+=" ${out_dir}/host/linux-x86/bin/adb libstdc++ "
+  make_command+=" ${out_dir}/target/product/${TARGET_PRODUCT}/system/etc/public.libraries.txt"
 fi
 
 echo "Executing $make_command"
diff --git a/tools/cpp-define-generator/Android.mk b/tools/cpp-define-generator/Android.mk
new file mode 100644
index 0000000..6ba643c
--- /dev/null
+++ b/tools/cpp-define-generator/Android.mk
@@ -0,0 +1,34 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.executable.mk
+
+CPP_DEFINE_GENERATOR_SRC_FILES := \
+  main.cc
+
+CPP_DEFINE_GENERATOR_EXTRA_SHARED_LIBRARIES :=
+CPP_DEFINE_GENERATOR_EXTRA_INCLUDE :=
+CPP_DEFINE_GENERATOR_MULTILIB :=
+
+# Build a "data" binary which will hold all the symbol values that will be parsed by the other scripts.
+#
+# Builds are for host only, target-specific define generation is possibly but is trickier and would need extra tooling.
+#
+# In the future we may wish to parameterize this on (32,64)x(read_barrier,no_read_barrier).
+$(eval $(call build-art-executable,cpp-define-generator-data,$(CPP_DEFINE_GENERATOR_SRC_FILES),$(CPP_DEFINE_GENERATOR_EXTRA_SHARED_LIBRARIES),$(CPP_DEFINE_GENERATOR_EXTRA_INCLUDE),host,debug,$(CPP_DEFINE_GENERATOR_MULTILIB),shared))
+
diff --git a/tools/cpp-define-generator/common.def b/tools/cpp-define-generator/common.def
new file mode 100644
index 0000000..76c64c9
--- /dev/null
+++ b/tools/cpp-define-generator/common.def
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Convenience macro to define an offset expression.
+
+#ifndef DEFINE_OFFSET_EXPR
+#define DEFINE_OFFSET_EXPR(holder_type, field_name, field_type, expr) \
+  DEFINE_EXPR(holder_type ## _ ## field_name ## _OFFSET, field_type, expr)
+#define DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#endif
+
diff --git a/tools/cpp-define-generator/common_undef.def b/tools/cpp-define-generator/common_undef.def
new file mode 100644
index 0000000..c44aba7
--- /dev/null
+++ b/tools/cpp-define-generator/common_undef.def
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#undef DEFINE_OFFSET_EXPR_STANDARD_DEFINITION
+#undef DEFINE_OFFSET_EXPR
+#endif
diff --git a/tools/cpp-define-generator/constant_class.def b/tools/cpp-define-generator/constant_class.def
new file mode 100644
index 0000000..58372f9
--- /dev/null
+++ b/tools/cpp-define-generator/constant_class.def
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/class.h"         // kStatusInitialized
+#include "modifiers.h"            // kAccClassIsFinalizable
+#include "base/bit_utils.h"       // MostSignificantBit
+#endif
+
+#define DEFINE_FLAG_OFFSET(type_name, field_name, expr) \
+  DEFINE_EXPR(type_name ## _ ## field_name, uint32_t, (expr))
+
+DEFINE_FLAG_OFFSET(MIRROR_CLASS, STATUS_INITIALIZED,       art::mirror::Class::kStatusInitialized)
+DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE,     art::kAccClassIsFinalizable)
+// TODO: We should really have a BitPosition which also checks it's a power of 2.
+DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE_BIT, art::MostSignificantBit(art::kAccClassIsFinalizable))
+
+#undef DEFINE_FLAG_OFFSET
diff --git a/tools/cpp-define-generator/constant_dexcache.def b/tools/cpp-define-generator/constant_dexcache.def
new file mode 100644
index 0000000..fd197f2
--- /dev/null
+++ b/tools/cpp-define-generator/constant_dexcache.def
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/dex_cache.h"   // art::mirror::DexCache, StringDexCachePair
+#endif
+
+DEFINE_EXPR(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT,       int32_t, art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair)))
+DEFINE_EXPR(STRING_DEX_CACHE_SIZE_MINUS_ONE,           int32_t, art::mirror::DexCache::kDexCacheStringCacheSize - 1)
+DEFINE_EXPR(STRING_DEX_CACHE_HASH_BITS,                int32_t,
+    art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))
\ No newline at end of file
diff --git a/tools/cpp-define-generator/constant_globals.def b/tools/cpp-define-generator/constant_globals.def
new file mode 100644
index 0000000..a3ccc72
--- /dev/null
+++ b/tools/cpp-define-generator/constant_globals.def
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export global values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "globals.h"         // art::kObjectAlignment
+#endif
+
+#define DEFINE_OBJECT_EXPR(macro_name, type, constant_field_name) \
+  DEFINE_EXPR(OBJECT_ ## macro_name, type, constant_field_name)
+
+DEFINE_OBJECT_EXPR(ALIGNMENT_MASK,         size_t,   art::kObjectAlignment - 1)
+DEFINE_OBJECT_EXPR(ALIGNMENT_MASK_TOGGLED, uint32_t, ~static_cast<uint32_t>(art::kObjectAlignment - 1))
+DEFINE_OBJECT_EXPR(ALIGNMENT_MASK_TOGGLED64, uint64_t, ~static_cast<uint64_t>(art::kObjectAlignment - 1))
+
+#undef DEFINE_OBJECT_EXPR
+
diff --git a/tools/cpp-define-generator/constant_heap.def b/tools/cpp-define-generator/constant_heap.def
new file mode 100644
index 0000000..dc76736
--- /dev/null
+++ b/tools/cpp-define-generator/constant_heap.def
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export heap values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "gc/heap.h"
+#endif
+
+// Size of references to the heap on the stack.
+DEFINE_EXPR(MIN_LARGE_OBJECT_THRESHOLD, size_t, art::gc::Heap::kMinLargeObjectThreshold)
+
diff --git a/tools/cpp-define-generator/constant_jit.def b/tools/cpp-define-generator/constant_jit.def
new file mode 100644
index 0000000..5fa5194
--- /dev/null
+++ b/tools/cpp-define-generator/constant_jit.def
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within jit.h.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "jit/jit.h"   // art::kSuspendRequest, etc.
+#endif
+
+#define DEFINE_JIT_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(JIT_ ## macro_name, type, (expr))
+
+DEFINE_JIT_CONSTANT(CHECK_OSR,       int16_t, art::jit::kJitCheckForOSR)
+DEFINE_JIT_CONSTANT(HOTNESS_DISABLE, int16_t, art::jit::kJitHotnessDisabled)
+
+#undef DEFINE_JIT_CONSTANT
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
new file mode 100644
index 0000000..67ed5b5
--- /dev/null
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export lockword values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "lock_word.h"         // art::LockWord
+#endif
+
+#define DEFINE_LOCK_WORD_EXPR(macro_name, type, constant_field_name) \
+  DEFINE_EXPR(LOCK_WORD_ ## macro_name, type, art::LockWord::constant_field_name)
+
+DEFINE_LOCK_WORD_EXPR(STATE_SHIFT,               int32_t,  kStateShift)
+DEFINE_LOCK_WORD_EXPR(STATE_MASK,                uint32_t, kStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_SHIFT,  int32_t,  kReadBarrierStateShift)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK,   uint32_t,  kReadBarrierStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
+DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE,       int32_t,  kThinLockCountOne)
+
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED,   uint32_t,  kGCStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT,   int32_t,  kGCStateShift)
+
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_SHIFT, int32_t, kMarkBitStateShift)
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_MASK_SHIFTED, uint32_t, kMarkBitStateMaskShifted)
+
+#undef DEFINE_LOCK_WORD_EXPR
+
diff --git a/tools/cpp-define-generator/constant_reference.def b/tools/cpp-define-generator/constant_reference.def
new file mode 100644
index 0000000..d312f76
--- /dev/null
+++ b/tools/cpp-define-generator/constant_reference.def
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/object.h"            // mirror::Object
+#include "stack.h"                    // StackReference
+#include "mirror/object_reference.h"  // mirror::CompressedReference
+#include "base/bit_utils.h"           // WhichPowerOf2
+#endif
+
+// Size of references to the heap on the stack.
+DEFINE_EXPR(STACK_REFERENCE_SIZE,            size_t, sizeof(art::StackReference<art::mirror::Object>))
+// Size of heap references
+DEFINE_EXPR(COMPRESSED_REFERENCE_SIZE,       size_t, sizeof(art::mirror::CompressedReference<art::mirror::Object>))
+DEFINE_EXPR(COMPRESSED_REFERENCE_SIZE_SHIFT, size_t, art::WhichPowerOf2(sizeof(art::mirror::CompressedReference<art::mirror::Object>)))
+
+#undef DEFINE_REFERENCE_OFFSET
diff --git a/tools/cpp-define-generator/constant_rosalloc.def b/tools/cpp-define-generator/constant_rosalloc.def
new file mode 100644
index 0000000..2007cef
--- /dev/null
+++ b/tools/cpp-define-generator/constant_rosalloc.def
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within RosAlloc.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "gc/allocator/rosalloc.h"   // art::gc::allocator::RosAlloc
+#endif
+
+#define DEFINE_ROSALLOC_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(ROSALLOC_ ## macro_name, type, (expr))
+
+DEFINE_ROSALLOC_CONSTANT(MAX_THREAD_LOCAL_BRACKET_SIZE, int32_t, art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize)
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_SHIFT,    int32_t, art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSizeShift)
+// TODO: This should be a BitUtils helper, e.g. BitMaskFromSize or something like that.
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK,     int32_t, static_cast<int32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK_TOGGLED32,\
+                                                        uint32_t, ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(BRACKET_QUANTUM_SIZE_MASK_TOGGLED64,\
+                                                        uint64_t, ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kThreadLocalBracketQuantumSize - 1))
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_OFFSET,          int32_t, art::gc::allocator::RosAlloc::RunFreeListOffset())
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_HEAD_OFFSET,     int32_t, art::gc::allocator::RosAlloc::RunFreeListHeadOffset())
+DEFINE_ROSALLOC_CONSTANT(RUN_FREE_LIST_SIZE_OFFSET,     int32_t, art::gc::allocator::RosAlloc::RunFreeListSizeOffset())
+DEFINE_ROSALLOC_CONSTANT(SLOT_NEXT_OFFSET,              int32_t, art::gc::allocator::RosAlloc::RunSlotNextOffset())
+
+
+#undef DEFINE_ROSALLOC_CONSTANT
diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def
new file mode 100644
index 0000000..af5ca21
--- /dev/null
+++ b/tools/cpp-define-generator/constant_thread.def
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Constants within thread.h.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "thread.h"   // art::kSuspendRequest, etc.
+#endif
+
+#define DEFINE_THREAD_CONSTANT(macro_name, type, expr) \
+  DEFINE_EXPR(THREAD_ ## macro_name, type, (expr))
+
+DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST,    int32_t, art::kSuspendRequest)
+DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest)
+
+#undef DEFINE_THREAD_CONSTANT
diff --git a/tools/cpp-define-generator/generate-asm-support b/tools/cpp-define-generator/generate-asm-support
new file mode 100755
index 0000000..f95648b
--- /dev/null
+++ b/tools/cpp-define-generator/generate-asm-support
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Generates asm_support_gen.h
+# - This must be run after a build since it uses cpp-define-generator-data
+
+[[ -z ${ANDROID_BUILD_TOP+x} ]] && (echo "Run source build/envsetup.sh first" >&2 && exit 1)
+
+cpp-define-generator-datad > ${ANDROID_BUILD_TOP}/art/runtime/generated/asm_support_gen.h
diff --git a/tools/cpp-define-generator/main.cc b/tools/cpp-define-generator/main.cc
new file mode 100644
index 0000000..a1b463a
--- /dev/null
+++ b/tools/cpp-define-generator/main.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <sstream>
+#include <type_traits>
+#include <ios>
+#include <algorithm>
+#include <string>
+
+// Art Offset file dependencies
+#define DEFINE_INCLUDE_DEPENDENCIES
+#include "offsets_all.def"
+
+std::string to_upper(std::string input) {
+  std::transform(input.begin(), input.end(), input.begin(), ::toupper);
+  return input;
+}
+
+template <typename T, typename = void>
+typename std::enable_if<!std::is_signed<T>::value, std::string>::type
+pretty_format(T value) {
+  // Print most values as hex.
+  std::stringstream ss;
+  ss << std::showbase << std::hex << value;
+  return ss.str();
+}
+
+template <typename T, typename = void>
+typename std::enable_if<std::is_signed<T>::value, std::string>::type
+pretty_format(T value) {
+  // Print "signed" values as decimal so that the negativity doesn't get lost.
+  std::stringstream ss;
+
+  // For negative values add a (). Omit it from positive values for conciseness.
+  if (value < 0) {
+    ss << "(";
+  }
+
+  ss << value;
+
+  if (value < 0) {
+    ss << ")";
+  }
+  return ss.str();
+}
+
+template <typename T>
+void cpp_define(std::string name, T value) {
+  std::cout << "#define " << name << " " << pretty_format(value) << std::endl;
+}
+
+template <typename T>
+void emit_check_eq(T value, std::string expr) {
+  std::cout << "DEFINE_CHECK_EQ(" << value << ", (" << expr << "))" << std::endl;
+}
+
+const char *kFileHeader = /* // NOLINT [readability/multiline_string] [5] */ R"L1C3NS3(
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+#define ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+
+// This file has been auto-generated by cpp-define-generator; do not edit directly.
+)L1C3NS3";  // NOLINT [readability/multiline_string] [5]
+
+const char *kFileFooter = /* // NOLINT [readability/multiline_string] [5] */ R"F00T3R(
+#endif  // ART_RUNTIME_GENERATED_ASM_SUPPORT_GEN_H_
+)F00T3R";  // NOLINT [readability/multiline_string] [5]
+
+#define MACROIZE(holder_type, field_name) to_upper(#holder_type "_" #field_name "_OFFSET")
+
+int main() {
+  std::cout << kFileHeader << std::endl;
+
+  std::string z = "";
+
+  // Print every constant expression to stdout as a #define or a CHECK_EQ
+#define DEFINE_EXPR(macro_name, field_type, expr) \
+  cpp_define(to_upper(#macro_name), static_cast<field_type>(expr)); \
+  emit_check_eq(z + "static_cast<" #field_type ">(" + to_upper(#macro_name) + ")", \
+                "static_cast<" #field_type ">(" #expr ")");
+#include "offsets_all.def"
+
+  std::cout << kFileFooter << std::endl;
+  return 0;
+}
diff --git a/tools/cpp-define-generator/offset_codeitem.def b/tools/cpp-define-generator/offset_codeitem.def
new file mode 100644
index 0000000..e5acd1d
--- /dev/null
+++ b/tools/cpp-define-generator/offset_codeitem.def
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within CodeItem.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include <cstddef>      // offsetof
+#include "dex_file.h"   // art::DexFile
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_CODEITEM_OFFSET(field_name) \
+  DEFINE_OFFSET_EXPR(CodeItem, field_name, int32_t, offsetof(art::DexFile::CodeItem, field_name ## _))
+
+//                     Field Name
+DEFINE_CODEITEM_OFFSET(insns)
+
+#undef DEFINE_CODEITEM_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_dexcache.def b/tools/cpp-define-generator/offset_dexcache.def
new file mode 100644
index 0000000..4b9d481
--- /dev/null
+++ b/tools/cpp-define-generator/offset_dexcache.def
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within art::ArtMethod.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "art_method.h"         // art::ArtMethod
+#include "base/enums.h"         // PointerSize
+#include "mirror/dex_cache.h"   // art::DexCache
+#endif
+
+#define DEFINE_ART_METHOD_OFFSET_SIZED(field_name, method_name) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_32, int32_t, art::ArtMethod::method_name##Offset(art::PointerSize::k32).Int32Value()) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_64, int32_t, art::ArtMethod::method_name##Offset(art::PointerSize::k64).Int32Value())
+
+#define DEFINE_ART_METHOD_OFFSET(field_name, method_name) \
+  DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET, int32_t, art::ArtMethod::method_name##Offset().Int32Value())
+
+#define DEFINE_DECLARING_CLASS_OFFSET(field_name, method_name) \
+  DEFINE_EXPR(DECLARING_CLASS_ ## field_name ## _OFFSET, int32_t, art::mirror::Class::method_name##Offset().Int32Value())
+
+//                         New macro suffix          Method Name (of the Offset method)
+DEFINE_ART_METHOD_OFFSET_SIZED(DEX_CACHE_METHODS,    DexCacheResolvedMethods)
+DEFINE_ART_METHOD_OFFSET_SIZED(DEX_CACHE_TYPES,      DexCacheResolvedTypes)
+DEFINE_ART_METHOD_OFFSET_SIZED(JNI,                  EntryPointFromJni)
+DEFINE_ART_METHOD_OFFSET_SIZED(QUICK_CODE,           EntryPointFromQuickCompiledCode)
+DEFINE_ART_METHOD_OFFSET(DECLARING_CLASS,            DeclaringClass)
+DEFINE_DECLARING_CLASS_OFFSET(DEX_CACHE_STRINGS,     DexCacheStrings)
+
+#undef DEFINE_ART_METHOD_OFFSET
+#undef DEFINE_ART_METHOD_OFFSET_32
+#undef DEFINE_DECLARING_CLASS_OFFSET
diff --git a/tools/cpp-define-generator/offset_mirror_object.def b/tools/cpp-define-generator/offset_mirror_object.def
new file mode 100644
index 0000000..9b99634
--- /dev/null
+++ b/tools/cpp-define-generator/offset_mirror_object.def
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within java.lang.Object (mirror::Object).
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "mirror/object.h"         // art::mirror::Object
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_MIRROR_OBJECT_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(MIRROR_OBJECT, field_name, int32_t, art::mirror::Object::method_name##Offset().Int32Value())
+
+//                          New macro suffix            Method Name (of the Offset method)
+DEFINE_MIRROR_OBJECT_OFFSET(CLASS,                      Class)
+DEFINE_MIRROR_OBJECT_OFFSET(LOCK_WORD,                  Monitor)
+
+#undef DEFINE_MIRROR_OBJECT_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_runtime.def b/tools/cpp-define-generator/offset_runtime.def
new file mode 100644
index 0000000..17167a0
--- /dev/null
+++ b/tools/cpp-define-generator/offset_runtime.def
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "runtime.h"         // art::Runtime
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+// Note: these callee save methods loads require read barriers.
+
+#define DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(field_name, constant_name) \
+  DEFINE_OFFSET_EXPR(Runtime, field_name ## _METHOD, size_t, art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: constant_name))
+
+                    //     Macro substring       Constant name
+// Offset of field Runtime::callee_save_methods_[kSaveAllCalleeSaves]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_ALL_CALLEE_SAVES, kSaveAllCalleeSaves)
+// Offset of field Runtime::callee_save_methods_[kSaveRefsOnly]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_ONLY, kSaveRefsOnly)
+// Offset of field Runtime::callee_save_methods_[kSaveRefsAndArgs]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_AND_ARGS, kSaveRefsAndArgs)
+// Offset of field Runtime::callee_save_methods_[kSaveEverything]
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING, kSaveEverything)
+
+#undef DEFINE_RUNTIME_CALLEE_SAVE_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_shadow_frame.def b/tools/cpp-define-generator/offset_shadow_frame.def
new file mode 100644
index 0000000..b49a340
--- /dev/null
+++ b/tools/cpp-define-generator/offset_shadow_frame.def
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "stack.h"         // art::ShadowFrame
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_SHADOW_FRAME_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(ShadowFrame, field_name, int32_t, art::ShadowFrame::method_name##Offset())
+
+//                         New macro suffix            Method Name (of the Offset method)
+DEFINE_SHADOW_FRAME_OFFSET(LINK,                       Link)
+DEFINE_SHADOW_FRAME_OFFSET(METHOD,                     Method)
+DEFINE_SHADOW_FRAME_OFFSET(RESULT_REGISTER,            ResultRegister)
+DEFINE_SHADOW_FRAME_OFFSET(DEX_PC_PTR,                 DexPCPtr)
+DEFINE_SHADOW_FRAME_OFFSET(CODE_ITEM,                  CodeItem)
+DEFINE_SHADOW_FRAME_OFFSET(LOCK_COUNT_DATA,            LockCountData)
+DEFINE_SHADOW_FRAME_OFFSET(NUMBER_OF_VREGS,            NumberOfVRegs)
+DEFINE_SHADOW_FRAME_OFFSET(DEX_PC,                     DexPC)
+DEFINE_SHADOW_FRAME_OFFSET(CACHED_HOTNESS_COUNTDOWN,   CachedHotnessCountdown)
+DEFINE_SHADOW_FRAME_OFFSET(VREGS,                      VRegs)
+
+#undef DEFINE_SHADOW_FRAME_OFFSET
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offset_thread.def b/tools/cpp-define-generator/offset_thread.def
new file mode 100644
index 0000000..6f94d38
--- /dev/null
+++ b/tools/cpp-define-generator/offset_thread.def
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Offsets within ShadowFrame.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "base/enums.h"    // PointerSize
+#include "stack.h"         // art::ShadowFrame
+#endif
+
+#include "common.def"        // DEFINE_OFFSET_EXPR
+
+#define DEFINE_THREAD_OFFSET(field_name, method_name) \
+  DEFINE_OFFSET_EXPR(Thread, field_name, int32_t, art::Thread::method_name##Offset<art::kRuntimePointerSize>().Int32Value())
+
+//                   New macro suffix            Method Name (of the Offset method)
+DEFINE_THREAD_OFFSET(FLAGS,                      ThreadFlags)
+DEFINE_THREAD_OFFSET(ID,                         ThinLockId)
+DEFINE_THREAD_OFFSET(IS_GC_MARKING,              IsGcMarking)
+DEFINE_THREAD_OFFSET(CARD_TABLE,                 CardTable)
+
+// TODO: The rest of the offsets
+// are dependent on __SIZEOF_POINTER__
+
+#undef DEFINE_THREAD_OFFSET
+
+#include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def
new file mode 100644
index 0000000..13371a1
--- /dev/null
+++ b/tools/cpp-define-generator/offsets_all.def
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Includes every single offset file in art.
+// Useful for processing every single offset together.
+
+// Usage:
+// #define DEFINE_INCLUDE_DEPENDENCIES
+// #include "offsets_all.def"
+// to automatically include each def file's header dependencies.
+//
+// Afterwards,
+// #define DEFINE_EXPR(define_name, field_type, expr) ...
+// #include "offsets_all.def"
+// to process each offset however one wants.
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#define DEFINE_EXPR(define_name, field_type, expr)
+#endif
+
+#if !defined(DEFINE_EXPR)
+#error "Either DEFINE_INCLUDE_DEPENDENCIES or DEFINE_EXPR must be defined"
+#endif
+
+#include "constant_reference.def"
+#include "offset_runtime.def"
+// TODO: rest of THREAD_ offsets (depends on __SIZEOF__POINTER__).
+#include "offset_thread.def"
+// TODO: SHADOW_FRAME depends on __SIZEOF__POINTER__
+// #include "offset_shadow_frame.def"
+#include "offset_codeitem.def"
+// TODO: MIRROR_OBJECT_HEADER_SIZE (depends on #ifdef read barrier)
+// TODO: MIRROR_CLASS offsets (see above)
+#include "offset_mirror_object.def"
+#include "constant_class.def"
+// TODO: MIRROR_*_ARRAY offsets (depends on header size)
+// TODO: MIRROR_STRING offsets (depends on header size)
+#include "offset_dexcache.def"
+#include "constant_dexcache.def"
+#include "constant_heap.def"
+#include "constant_lockword.def"
+#include "constant_globals.def"
+#include "constant_rosalloc.def"
+#include "constant_thread.def"
+#include "constant_jit.def"
+
+// TODO: MIRROR_OBJECT_HEADER_SIZE #ifdef depends on read barriers
+// TODO: Array offsets (depends on MIRROR_OBJECT_HEADER_SIZE)
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#undef DEFINE_EXPR
+#undef DEFINE_INCLUDE_DEPENDENCIES
+#endif
+
+
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Device.java b/tools/dexfuzz/src/dexfuzz/executors/Device.java
index 4a53957..45538fe 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Device.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Device.java
@@ -68,7 +68,13 @@
     return envVars.get(key);
   }
 
-  private String getHostCoreImagePath() {
+  private String getHostCoreImagePathWithArch() {
+    // TODO: Using host currently implies x86 (see Options.java), change this when generalized.
+    assert(Options.useArchX86);
+    return androidHostOut + "/framework/x86/core.art";
+  }
+
+  private String getHostCoreImagePathNoArch() {
     return androidHostOut + "/framework/core.art";
   }
 
@@ -80,7 +86,7 @@
     androidHostOut = checkForEnvVar(envVars, "ANDROID_HOST_OUT");
 
     if (Options.executeOnHost) {
-      File coreImage = new File(getHostCoreImagePath());
+      File coreImage = new File(getHostCoreImagePathWithArch());
       if (!coreImage.exists()) {
         Log.errorAndQuit("Host core image not found at " + coreImage.getPath()
             + ". Did you forget to build it?");
@@ -156,7 +162,7 @@
    * Get any extra flags required to execute ART on the host.
    */
   public String getHostExecutionFlags() {
-    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePath());
+    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePathNoArch());
   }
 
   public String getAndroidHostOut() {
diff --git a/tools/dmtracedump/tracedump.cc b/tools/dmtracedump/tracedump.cc
index f70e2c2..3afee6f 100644
--- a/tools/dmtracedump/tracedump.cc
+++ b/tools/dmtracedump/tracedump.cc
@@ -512,10 +512,10 @@
 void freeDataKeys(DataKeys* pKeys) {
   if (pKeys == nullptr) return;
 
-  free(pKeys->fileData);
-  free(pKeys->threads);
-  free(pKeys->methods);
-  free(pKeys);
+  delete[] pKeys->fileData;
+  delete[] pKeys->threads;
+  delete[] pKeys->methods;
+  delete pKeys;
 }
 
 /*
@@ -822,8 +822,8 @@
 DataKeys* parseKeys(FILE* fp, int32_t verbose) {
   int64_t offset;
   DataKeys* pKeys = new DataKeys();
-  memset(pKeys, 0, sizeof(DataKeys));
   if (pKeys == nullptr) return nullptr;
+  memset(pKeys, 0, sizeof(DataKeys));
 
   /*
    * We load the entire file into memory.  We do this, rather than memory-
@@ -865,9 +865,13 @@
     return nullptr;
   }
 
-  /* Reduce our allocation now that we know where the end of the key section is. */
-  pKeys->fileData = reinterpret_cast<char*>(realloc(pKeys->fileData, offset));
-  pKeys->fileLen = offset;
+  /*
+   * Although it is tempting to reduce our allocation now that we know where the
+   * end of the key section is, there is a pitfall. The method names and
+   * signatures in the method list contain pointers into the fileData area.
+   * Realloc or free will result in corruption.
+   */
+
   /* Leave fp pointing to the beginning of the data section. */
   fseek(fp, offset, SEEK_SET);
 
@@ -2607,7 +2611,7 @@
     if (gOptions.graphFileName != nullptr) {
       createInclusiveProfileGraphNew(dataKeys);
     }
-    free(methods);
+    delete[] methods;
   }
 
   freeDataKeys(dataKeys);
diff --git a/test/955-lambda-smali/build b/tools/javafuzz/Android.mk
old mode 100755
new mode 100644
similarity index 63%
copy from test/955-lambda-smali/build
copy to tools/javafuzz/Android.mk
index 14230c2..63db57a
--- a/test/955-lambda-smali/build
+++ b/tools/javafuzz/Android.mk
@@ -1,6 +1,4 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# make us exit on a failure
-set -e
+# Java fuzzer tool.
 
-./default-build "$@" --experimental default-methods
+LOCAL_PATH:= $(call my-dir)
+
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := cc
+LOCAL_SRC_FILES := javafuzz.cc
+LOCAL_CFLAGS += -O0 -g -Wall
+LOCAL_MODULE_HOST_OS := darwin linux windows
+LOCAL_MODULE := javafuzz
+include $(BUILD_HOST_EXECUTABLE)
diff --git a/tools/javafuzz/README.md b/tools/javafuzz/README.md
new file mode 100644
index 0000000..35c057c
--- /dev/null
+++ b/tools/javafuzz/README.md
@@ -0,0 +1,81 @@
+JavaFuzz
+========
+
+JavaFuzz is a tool for generating random Java programs with the objective
+of fuzz testing the ART infrastructure. Each randomly generated Java program
+can be run under various modes of execution, such as using the interpreter,
+using the optimizing compiler, using an external reference implementation,
+or using various target architectures. Any difference between the outputs
+(**divergence**) may indicate a bug in one of the execution modes.
+
+JavaFuzz can be combined with dexfuzz to get multilayered fuzz testing.
+
+How to run JavaFuzz
+===================
+
+    javafuzz [-s seed] [-d expr-depth] [-l stmt-length]
+             [-i if-nest] [-n loop-nest]
+
+where
+
+    -s : defines a deterministic random seed
+         (randomized using time by default)
+    -d : defines a fuzzing depth for expressions
+         (higher values yield deeper expressions)
+    -l : defines a fuzzing length for statement lists
+         (higher values yield longer statement sequences)
+    -i : defines a fuzzing nest for if/switch statements
+         (higher values yield deeper nested conditionals)
+    -n : defines a fuzzing nest for for/while/do-while loops
+         (higher values yield deeper nested loops)
+
+The current version of JavaFuzz sends all output to stdout, and uses
+a fixed testing class named Test. So a typical test run looks as follows.
+
+    javafuzz > Test.java
+    jack -cp ${JACK_CLASSPATH} --output-dex . Test.java
+    art -classpath classes.dex Test
+
+How to start the JavaFuzz tests
+===============================
+
+    run_java_fuzz_test.py [--num_tests]
+                          [--mode1=mode] [--mode2=mode]
+
+where
+
+    --num_tests: number of tests to run (10000 by default)
+    --mode1:m1
+    --mode2:m2
+    with m1 != m2, and one of
+      ri   : reference implementation on host (default for m1)
+      hint : Art interpreter on host
+      hopt : Art optimizing on host (default for m2)
+      tint : Art interpreter on target
+      topt : Art optimizing on target
+
+Background
+==========
+
+Although test suites are extremely useful to validate the correctness of a
+system and to ensure that no regressions occur, any test suite is necessarily
+finite in size and scope. Tests typically focus on validating particular
+features by means of code sequences most programmers would expect. Regression
+tests often use slightly less idiomatic code sequences, since they reflect
+problems that were not anticipated originally, but occurred “in the field”.
+Still, any test suite leaves the developer wondering whether undetected bugs
+and flaws still linger in the system.
+
+Over the years, fuzz testing has gained popularity as a testing technique for
+discovering such lingering bugs, including bugs that can bring down a system in
+an unexpected way. Fuzzing refers to feeding a large amount of random data as
+input to a system in an attempt to find bugs or make it crash. Mutation-based
+fuzz testing is a special form of fuzzing that applies small random changes to
+existing inputs in order to detect shortcomings in a system. Profile-guided or
+coverage-guided fuzzing adds a direction to the way these random changes are
+applied. Multilayer approaches generate random inputs that are subsequently
+mutated at various stages of execution.
+
+The randomness of fuzz testing implies that the size and scope of testing is no
+longer bounded. Every new run can potentially discover bugs and crashes that were
+hereto undetected.
diff --git a/tools/javafuzz/javafuzz.cc b/tools/javafuzz/javafuzz.cc
new file mode 100644
index 0000000..4e6e978
--- /dev/null
+++ b/tools/javafuzz/javafuzz.cc
@@ -0,0 +1,1072 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <random>
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+
+namespace {
+
+/*
+ * Java operators.
+ */
+
+#define EMIT(x) fputs((x)[random0(sizeof(x)/sizeof(const char*))], out_);
+
+static constexpr const char* kIncDecOps[]   = { "++", "--" };
+static constexpr const char* kIntUnaryOps[] = { "+", "-", "~" };
+static constexpr const char* kFpUnaryOps[]  = { "+", "-" };
+
+static constexpr const char* kBoolBinOps[] = { "&&", "||", "&", "|", "^" };  // few less common
+static constexpr const char* kIntBinOps[]  = { "+", "-", "*", "/", "%",
+                                               ">>", ">>>", "<<", "&", "|", "^" };
+static constexpr const char* kFpBinOps[]   = { "+", "-", "*", "/" };
+
+static constexpr const char* kBoolAssignOps[] = { "=", "&=" , "|=", "^=" };  // few less common
+static constexpr const char* kIntAssignOps[]  = { "=", "+=", "-=", "*=", "/=", "%=",
+                                                  ">>=", ">>>=", "<<=", "&=", "|=", "^=" };
+static constexpr const char* kFpAssignOps[]   = { "=", "+=", "-=", "*=", "/=" };
+
+static constexpr const char* kBoolRelOps[] = { "==", "!=" };
+static constexpr const char* kRelOps[]     = { "==", "!=", ">", ">=", "<", "<=" };
+
+/*
+ * Version of JavaFuzz. Increase this each time changes are made to the program
+ * to preserve the property that a given version of JavaFuzz yields the same
+ * fuzzed Java program for a deterministic random seed.
+ */
+const char* VERSION = "1.0";
+
+/**
+ * A class that generates a random Java program that compiles correctly. The program
+ * is generated using rules that generate various programming constructs. Each rule
+ * has a fixed probability to "fire". Running a generated program yields deterministic
+ * output, making it suited to test various modes of execution (e.g an interpreter vs.
+ * an compiler or two different run times) for divergences.
+ *
+ * TODO: Due to the original scope of this project, the generated Java program is heavy
+ *       on loops, arrays, and basic operations; fuzzing other aspects of Java programs,
+ *       like elaborate typing, class hierarchies, and interfaces is still TBD.
+ */
+class JavaFuzz {
+ public:
+  JavaFuzz(FILE* out,
+           uint32_t seed,
+           uint32_t expr_depth,
+           uint32_t stmt_length,
+           uint32_t if_nest,
+           uint32_t loop_nest)
+      : out_(out),
+        fuzz_random_engine_(seed),
+        fuzz_seed_(seed),
+        fuzz_expr_depth_(expr_depth),
+        fuzz_stmt_length_(stmt_length),
+        fuzz_if_nest_(if_nest),
+        fuzz_loop_nest_(loop_nest),
+        return_type_(randomType()),
+        array_type_(randomType()),
+        array_dim_(random1(3)),
+        array_size_(random1(10)),
+        indentation_(0),
+        expr_depth_(0),
+        stmt_length_(0),
+        if_nest_(0),
+        loop_nest_(0),
+        switch_nest_(0),
+        do_nest_(0),
+        boolean_local_(0),
+        int_local_(0),
+        long_local_(0),
+        float_local_(0),
+        double_local_(0) { }
+
+  ~JavaFuzz() { }
+
+  void emitProgram() {
+    emitHeader();
+    emitTestClassWithMain();
+  }
+
+ private:
+  //
+  // Types.
+  //
+
+  // Current type of each expression during generation.
+  enum Type {
+    kBoolean,
+    kInt,
+    kLong,
+    kFloat,
+    kDouble
+  };
+
+  // Test for an integral type.
+  static bool isInteger(Type tp) {
+    return tp == kInt || tp == kLong;
+  }
+
+  // Test for a floating-point type.
+  static bool isFP(Type tp) {
+    return tp == kFloat || tp == kDouble;
+  }
+
+  // Emit type.
+  void emitType(Type tp) const {
+    switch (tp) {
+      case kBoolean: fputs("boolean", out_); break;
+      case kInt:     fputs("int",     out_); break;
+      case kLong:    fputs("long",    out_); break;
+      case kFloat:   fputs("float",   out_); break;
+      case kDouble:  fputs("double",  out_); break;
+    }
+  }
+
+  // Emit type class.
+  void emitTypeClass(Type tp) const {
+    switch (tp) {
+      case kBoolean: fputs("Boolean", out_); break;
+      case kInt:     fputs("Integer", out_); break;
+      case kLong:    fputs("Long",    out_); break;
+      case kFloat:   fputs("Float",   out_); break;
+      case kDouble:  fputs("Double",  out_); break;
+    }
+  }
+
+  // Return a random type.
+  Type randomType() {
+    switch (random1(5)) {
+      case 1:  return kBoolean;
+      case 2:  return kInt;
+      case 3:  return kLong;
+      case 4:  return kFloat;
+      default: return kDouble;
+    }
+  }
+
+  //
+  // Expressions.
+  //
+
+  // Emit an unary operator (same type in-out).
+  void emitUnaryOp(Type tp) {
+    if (tp == kBoolean) {
+      fputs("!", out_);
+    } else if (isInteger(tp)) {
+      EMIT(kIntUnaryOps);
+    } else {  // isFP(tp)
+      EMIT(kFpUnaryOps);
+    }
+  }
+
+  // Emit a pre/post-increment/decrement operator (same type in-out).
+  void emitIncDecOp(Type tp) {
+    if (tp == kBoolean) {
+      // Not applicable, just leave "as is".
+    } else {  // isInteger(tp) || isFP(tp)
+      EMIT(kIncDecOps);
+    }
+  }
+
+  // Emit a binary operator (same type in-out).
+  void emitBinaryOp(Type tp) {
+    if (tp == kBoolean) {
+      EMIT(kBoolBinOps);
+    } else if (isInteger(tp)) {
+      EMIT(kIntBinOps);
+    } else {  // isFP(tp)
+      EMIT(kFpBinOps);
+    }
+  }
+
+  // Emit an assignment operator (same type in-out).
+  void emitAssignmentOp(Type tp) {
+    if (tp == kBoolean) {
+      EMIT(kBoolAssignOps);
+    } else if (isInteger(tp)) {
+      EMIT(kIntAssignOps);
+    } else {  // isFP(tp)
+      EMIT(kFpAssignOps);
+    }
+  }
+
+  // Emit a relational operator (one type in, boolean out).
+  void emitRelationalOp(Type tp) {
+    if (tp == kBoolean) {
+      EMIT(kBoolRelOps);
+    } else {  // isInteger(tp) || isFP(tp)
+      EMIT(kRelOps);
+    }
+  }
+
+  // Emit a type conversion operator sequence (out type given, new suitable in type picked).
+  Type emitTypeConversionOp(Type tp) {
+    if (tp == kInt) {
+      switch (random1(5)) {
+        case 1: fputs("(int)", out_); return kLong;
+        case 2: fputs("(int)", out_); return kFloat;
+        case 3: fputs("(int)", out_); return kDouble;
+        // Narrowing-widening.
+        case 4: fputs("(int)(byte)(int)",  out_); return kInt;
+        case 5: fputs("(int)(short)(int)", out_); return kInt;
+      }
+    } else if (tp == kLong) {
+      switch (random1(6)) {
+        case 1: /* implicit */         return kInt;
+        case 2: fputs("(long)", out_); return kFloat;
+        case 3: fputs("(long)", out_); return kDouble;
+        // Narrowing-widening.
+        case 4: fputs("(long)(byte)(long)",  out_); return kLong;
+        case 5: fputs("(long)(short)(long)", out_); return kLong;
+        case 6: fputs("(long)(int)(long)",   out_); return kLong;
+      }
+    } else if (tp == kFloat) {
+      switch (random1(3)) {
+        case 1: fputs("(float)", out_); return kInt;
+        case 2: fputs("(float)", out_); return kLong;
+        case 3: fputs("(float)", out_); return kDouble;
+      }
+    } else if (tp == kDouble) {
+      switch (random1(3)) {
+        case 1: fputs("(double)", out_); return kInt;
+        case 2: fputs("(double)", out_); return kLong;
+        case 3: fputs("(double)", out_); return kFloat;
+      }
+    }
+    return tp;  // nothing suitable, just keep type
+  }
+
+  // Emit a type conversion (out type given, new suitable in type picked).
+  void emitTypeConversion(Type tp) {
+    if (tp == kBoolean) {
+      Type tp = randomType();
+      emitExpression(tp);
+      fputc(' ', out_);
+      emitRelationalOp(tp);
+      fputc(' ', out_);
+      emitExpression(tp);
+    } else {
+      tp = emitTypeConversionOp(tp);
+      fputc(' ', out_);
+      emitExpression(tp);
+    }
+  }
+
+  // Emit an unary intrinsic (out type given, new suitable in type picked).
+  Type emitIntrinsic1(Type tp) {
+    if (tp == kBoolean) {
+      switch (random1(4)) {
+        case 1: fputs("Float.isNaN",       out_); return kFloat;
+        case 2: fputs("Float.isInfinite",  out_); return kFloat;
+        case 3: fputs("Double.isNaN",      out_); return kDouble;
+        case 4: fputs("Double.isInfinite", out_); return kDouble;
+      }
+    } else if (isInteger(tp)) {
+      const char* prefix = tp == kLong ? "Long" : "Integer";
+      switch (random1(9)) {
+        case 1: fprintf(out_, "%s.highestOneBit",         prefix); break;
+        case 2: fprintf(out_, "%s.lowestOneBit",          prefix); break;
+        case 3: fprintf(out_, "%s.numberOfLeadingZeros",  prefix); break;
+        case 4: fprintf(out_, "%s.numberOfTrailingZeros", prefix); break;
+        case 5: fprintf(out_, "%s.bitCount",              prefix); break;
+        case 6: fprintf(out_, "%s.signum",                prefix); break;
+        case 7: fprintf(out_, "%s.reverse",               prefix); break;
+        case 8: fprintf(out_, "%s.reverseBytes",          prefix); break;
+        case 9: fputs("Math.abs", out_);                           break;
+      }
+    } else {  // isFP(tp)
+      switch (random1(5)) {
+        case 1: fputs("Math.abs",      out_); break;
+        case 2: fputs("Math.ulp",      out_); break;
+        case 3: fputs("Math.signum",   out_); break;
+        case 4: fputs("Math.nextUp",   out_); break;
+        case 5: fputs("Math.nextDown", out_); break;
+      }
+    }
+    return tp;  // same type in-out
+  }
+
+  // Emit a binary intrinsic (out type given, new suitable in type picked).
+  Type emitIntrinsic2(Type tp) {
+    if (tp == kBoolean) {
+      switch (random1(3)) {
+        case 1: fputs("Boolean.logicalAnd", out_); break;
+        case 2: fputs("Boolean.logicalOr",  out_); break;
+        case 3: fputs("Boolean.logicalXor", out_); break;
+      }
+    } else if (isInteger(tp)) {
+      const char* prefix = tp == kLong ? "Long" : "Integer";
+      switch (random1(3)) {
+        case 1: fprintf(out_, "%s.compare", prefix); break;
+        case 2: fputs("Math.min", out_); break;
+        case 3: fputs("Math.max", out_); break;
+      }
+    } else {  // isFP(tp)
+      switch (random1(2)) {
+        case 1: fputs("Math.min", out_); break;
+        case 2: fputs("Math.max", out_); break;
+      }
+    }
+    return tp;  // same type in-out
+  }
+
+  // Emit an intrinsic (out type given, new suitable in type picked).
+  void emitIntrinsic(Type tp) {
+    if (random1(2) == 1) {
+      tp = emitIntrinsic1(tp);
+      fputc('(', out_);
+      emitExpression(tp);
+      fputc(')', out_);
+    } else {
+      tp = emitIntrinsic2(tp);
+      fputc('(', out_);
+      emitExpression(tp);
+      fputs(", ", out_);
+      emitExpression(tp);
+      fputc(')', out_);
+    }
+  }
+
+  // Emit unboxing boxed object.
+  void emitUnbox(Type tp) {
+    fputc('(', out_);
+    emitType(tp);
+    fputs(") new ", out_);
+    emitTypeClass(tp);
+    fputc('(', out_);
+    emitExpression(tp);
+    fputc(')', out_);
+  }
+
+  // Emit miscellaneous constructs.
+  void emitMisc(Type tp) {
+    switch (tp) {
+      case kBoolean: fputs("this instanceof Test", out_); break;
+      case kInt:     fputs("mArray.length",    out_); break;
+      case kLong:    fputs("Long.MAX_VALUE",   out_); break;
+      case kFloat:   fputs("Float.MAX_VALUE",  out_); break;
+      case kDouble:  fputs("Double.MAX_VALUE", out_); break;
+    }
+  }
+
+  // Adjust local of given type and return adjusted value.
+  uint32_t adjustLocal(Type tp, int32_t a) {
+    switch (tp) {
+      case kBoolean: boolean_local_ += a; return boolean_local_;
+      case kInt:     int_local_     += a; return int_local_;
+      case kLong:    long_local_    += a; return long_local_;
+      case kFloat:   float_local_   += a; return float_local_;
+      default:       double_local_  += a; return double_local_;
+    }
+  }
+
+  // Emit an expression that is a strict upper bound for an array index.
+  void emitUpperBound() {
+    if (random1(8) == 1) {
+      fputs("mArray.length", out_);
+    } else if (random1(8) == 1) {
+      fprintf(out_, "%u", random1(array_size_));  // random in range
+    } else {
+      fprintf(out_, "%u", array_size_);
+    }
+  }
+
+  // Emit an array index, usually within proper range.
+  void emitArrayIndex() {
+    if (loop_nest_ > 0 && random1(2) == 1) {
+      fprintf(out_, "i%u", random0(loop_nest_));
+    } else if (random1(8) == 1) {
+      fputs("mArray.length - 1", out_);
+    } else {
+      fprintf(out_, "%u", random0(array_size_));  // random in range
+    }
+    // Introduce potential off by one errors with low probability.
+    if (random1(100) == 1) {
+      if (random1(2) == 1) {
+        fputs(" - 1", out_);
+      } else {
+        fputs(" + 1", out_);
+      }
+    }
+  }
+
+  // Emit a literal.
+  void emitLiteral(Type tp) {
+    switch (tp) {
+      case kBoolean: fputs(random1(2) == 1 ? "true" : "false", out_); break;
+      case kInt:     fprintf(out_, "%d",    random0(100)); break;
+      case kLong:    fprintf(out_, "%dL",   random0(100)); break;
+      case kFloat:   fprintf(out_, "%d.0f", random0(100)); break;
+      case kDouble:  fprintf(out_, "%d.0",  random0(100)); break;
+    }
+  }
+
+  // Emit array variable, if available.
+  bool emitArrayVariable(Type tp) {
+    if (tp == array_type_) {
+      fputs("mArray", out_);
+      for (uint32_t i = 0; i < array_dim_; i++) {
+        fputc('[', out_);
+        emitArrayIndex();
+        fputc(']', out_);
+      }
+      return true;
+    }
+    return false;
+  }
+
+  // Emit a loop variable, if available.
+  bool emitLoopVariable(Type tp) {
+    if (tp == kInt) {
+      if (loop_nest_ > 0) {
+        fprintf(out_, "i%u", random0(loop_nest_));
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Emit a local variable, if available.
+  bool emitLocalVariable(Type tp) {
+    uint32_t locals = adjustLocal(tp, 0);
+    if (locals > 0) {
+      uint32_t local = random0(locals);
+      switch (tp) {
+        case kBoolean: fprintf(out_, "lZ%u", local); break;
+        case kInt:     fprintf(out_, "lI%u", local); break;
+        case kLong:    fprintf(out_, "lJ%u", local); break;
+        case kFloat:   fprintf(out_, "lF%u", local); break;
+        case kDouble:  fprintf(out_, "lD%u", local); break;
+      }
+      return true;
+    }
+    return false;
+  }
+
+  // Emit a field variable.
+  void emitFieldVariable(Type tp) {
+    switch (tp) {
+      case kBoolean:fputs("mZ", out_); break;
+      case kInt:    fputs("mI", out_); break;
+      case kLong:   fputs("mJ", out_); break;
+      case kFloat:  fputs("mF", out_); break;
+      case kDouble: fputs("mD", out_); break;
+    }
+  }
+
+  // Emit a variable.
+  void emitVariable(Type tp) {
+    switch (random1(4)) {
+      case 1:
+        if (emitArrayVariable(tp))
+          return;
+        // FALL-THROUGH
+      case 2:
+        if (emitLocalVariable(tp))
+          return;
+        // FALL-THROUGH
+      case 3:
+        if (emitLoopVariable(tp))
+          return;
+        // FALL-THROUGH
+      default:
+        emitFieldVariable(tp);
+        break;
+    }
+  }
+
+  // Emit an expression.
+  void emitExpression(Type tp) {
+    // Continuing expression becomes less likely as the depth grows.
+    if (random1(expr_depth_ + 1) > fuzz_expr_depth_) {
+      if (random1(2) == 1) {
+        emitLiteral(tp);
+      } else {
+        emitVariable(tp);
+      }
+      return;
+    }
+
+    expr_depth_++;
+
+    fputc('(', out_);
+    switch (random1(12)) {  // favor binary operations
+      case 1:
+        // Unary operator: ~x
+        emitUnaryOp(tp);
+        emitExpression(tp);
+        break;
+      case 2:
+        // Pre-increment: ++x
+        emitIncDecOp(tp);
+        emitVariable(tp);
+        break;
+      case 3:
+        // Post-increment: x++
+        emitVariable(tp);
+        emitIncDecOp(tp);
+        break;
+      case 4:
+        // Ternary operator: b ? x : y
+        emitExpression(kBoolean);
+        fputs(" ? ", out_);
+        emitExpression(tp);
+        fputs(" : ", out_);
+        emitExpression(tp);
+        break;
+      case 5:
+        // Type conversion: (float) x
+        emitTypeConversion(tp);
+        break;
+      case 6:
+        // Intrinsic: foo(x)
+        emitIntrinsic(tp);
+        break;
+      case 7:
+        // Emit unboxing boxed value: (int) Integer(x)
+        emitUnbox(tp);
+        break;
+      case 8:
+        // Miscellaneous constructs: a.length
+        emitMisc(tp);
+        break;
+      default:
+        // Binary operator: x + y
+        emitExpression(tp);
+        fputc(' ', out_);
+        emitBinaryOp(tp);
+        fputc(' ', out_);
+        emitExpression(tp);
+        break;
+    }
+    fputc(')', out_);
+
+    --expr_depth_;
+  }
+
+  //
+  // Statements.
+  //
+
+  // Emit current indentation.
+  void emitIndentation() const {
+    for (uint32_t i = 0; i < indentation_; i++) {
+      fputc(' ', out_);
+    }
+  }
+
+  // Emit a return statement.
+  bool emitReturn(bool mustEmit) {
+    // Only emit when we must, or with low probability inside ifs/loops,
+    // but outside do-while to avoid confusing the may follow status.
+    if (mustEmit || ((if_nest_ + loop_nest_) > 0 && do_nest_ == 0 && random1(10) == 1)) {
+      fputs("return ", out_);
+      emitExpression(return_type_);
+      fputs(";\n", out_);
+      return false;
+    }
+    // Fall back to assignment.
+    return emitAssignment();
+  }
+
+  // Emit a continue statement.
+  bool emitContinue() {
+    // Only emit with low probability inside loops.
+    if (loop_nest_ > 0 && random1(10) == 1) {
+      fputs("continue;\n", out_);
+      return false;
+    }
+    // Fall back to assignment.
+    return emitAssignment();
+  }
+
+  // Emit a break statement.
+  bool emitBreak() {
+    // Only emit with low probability inside loops, but outside switches
+    // to avoid confusing the may follow status.
+    if (loop_nest_ > 0 && switch_nest_ == 0 && random1(10) == 1) {
+      fputs("break;\n", out_);
+      return false;
+    }
+    // Fall back to assignment.
+    return emitAssignment();
+  }
+
+  // Emit a new scope with a local variable declaration statement.
+  bool emitScope() {
+    Type tp = randomType();
+    fputs("{\n", out_);
+    indentation_ += 2;
+    emitIndentation();
+    emitType(tp);
+    switch (tp) {
+      case kBoolean: fprintf(out_, " lZ%u = ", boolean_local_); break;
+      case kInt:     fprintf(out_, " lI%u = ", int_local_);     break;
+      case kLong:    fprintf(out_, " lJ%u = ", long_local_);    break;
+      case kFloat:   fprintf(out_, " lF%u = ", float_local_);   break;
+      case kDouble:  fprintf(out_, " lD%u = ", double_local_);  break;
+    }
+    emitExpression(tp);
+    fputs(";\n", out_);
+
+    adjustLocal(tp, 1);  // local now visible
+
+    bool mayFollow = emitStatementList();
+
+    adjustLocal(tp, -1);  // local no longer visible
+
+    indentation_ -= 2;
+    emitIndentation();
+    fputs("}\n", out_);
+    return mayFollow;
+  }
+
+  // Emit a for loop.
+  bool emitForLoop() {
+    // Continuing loop nest becomes less likely as the depth grows.
+    if (random1(loop_nest_ + 1) > fuzz_loop_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    bool goesUp = random1(2) == 1;
+    fprintf(out_, "for (int i%u = ", loop_nest_);
+    if (goesUp) {
+      fprintf(out_, "0; i%u < ", loop_nest_);
+      emitUpperBound();
+      fprintf(out_, "; i%u++) {\n", loop_nest_);
+    } else {
+      emitUpperBound();
+      fprintf(out_, " - 1; i%d >= 0", loop_nest_);
+      fprintf(out_, "; i%d--) {\n", loop_nest_);
+    }
+
+    ++loop_nest_;  // now in loop
+
+    indentation_ += 2;
+    emitStatementList();
+
+    --loop_nest_;  // no longer in loop
+
+    indentation_ -= 2;
+    emitIndentation();
+    fprintf(out_, "}\n");
+    return true;  // loop-body does not block flow
+  }
+
+  // Emit while or do-while loop.
+  bool emitDoLoop() {
+    // Continuing loop nest becomes less likely as the depth grows.
+    if (random1(loop_nest_ + 1) > fuzz_loop_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    // TODO: remove this
+    // The jack bug b/28862040 prevents generating while/do-while loops because otherwise
+    // we get dozens of reports on the same issue per nightly/ run.
+    if (true) {
+      return emitAssignment();
+    }
+
+    bool isWhile = random1(2) == 1;
+    fputs("{\n", out_);
+    indentation_ += 2;
+    emitIndentation();
+    fprintf(out_, "int i%u = %d;", loop_nest_, isWhile ? -1 : 0);
+    emitIndentation();
+    if (isWhile) {
+      fprintf(out_, "while (++i%u < ", loop_nest_);
+      emitUpperBound();
+      fputs(") {\n", out_);
+    } else {
+      fputs("do {\n", out_);
+      do_nest_++;
+    }
+
+    ++loop_nest_;  // now in loop
+
+    indentation_ += 2;
+    emitStatementList();
+
+    --loop_nest_;  // no longer in loop
+
+    indentation_ -= 2;
+    emitIndentation();
+    if (isWhile) {
+      fputs("}\n", out_);
+    } else {
+      fprintf(out_, "} while (++i%u < ", loop_nest_);
+      emitUpperBound();
+      fputs(");\n", out_);
+      --do_nest_;
+    }
+    indentation_ -= 2;
+    emitIndentation();
+    fputs("}\n", out_);
+    return true;  // loop-body does not block flow
+  }
+
+  // Emit an if statement.
+  bool emitIfStmt() {
+    // Continuing if nest becomes less likely as the depth grows.
+    if (random1(if_nest_ + 1) > fuzz_if_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    fputs("if (", out_);
+    emitExpression(kBoolean);
+    fputs(") {\n", out_);
+
+    ++if_nest_;  // now in if
+
+    indentation_ += 2;
+    bool mayFollowTrue = emitStatementList();
+    indentation_ -= 2;
+    emitIndentation();
+    fprintf(out_, "} else {\n");
+    indentation_ += 2;
+    bool mayFollowFalse = emitStatementList();
+
+    --if_nest_;  // no longer in if
+
+    indentation_ -= 2;
+    emitIndentation();
+    fprintf(out_, "}\n");
+    return mayFollowTrue || mayFollowFalse;
+  }
+
+  // Emit a switch statement.
+  bool emitSwitch() {
+    // Continuing if nest becomes less likely as the depth grows.
+    if (random1(if_nest_ + 1) > fuzz_if_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    bool mayFollow = false;
+    fputs("switch (", out_);
+    emitExpression(kInt);
+    fputs(") {\n", out_);
+
+    ++if_nest_;
+    ++switch_nest_;  // now in switch
+
+    indentation_ += 2;
+    for (uint32_t i = 0; i < 2; i++) {
+      emitIndentation();
+      if (i == 0) {
+        fprintf(out_, "case %d: {\n", random0(100));
+      } else {
+        fprintf(out_, "default: {\n");
+      }
+      indentation_ += 2;
+      if (emitStatementList()) {
+        // Must end with break.
+        emitIndentation();
+        fputs("break;\n", out_);
+        mayFollow = true;
+      }
+      indentation_ -= 2;
+      emitIndentation();
+      fputs("}\n", out_);
+    }
+
+    --if_nest_;
+    --switch_nest_;  // no longer in switch
+
+    indentation_ -= 2;
+    emitIndentation();
+    fprintf(out_, "}\n");
+    return mayFollow;
+  }
+
+  // Emit an assignment statement.
+  bool emitAssignment() {
+    Type tp = randomType();
+    emitVariable(tp);
+    fputc(' ', out_);
+    emitAssignmentOp(tp);
+    fputc(' ', out_);
+    emitExpression(tp);
+    fputs(";\n", out_);
+    return true;
+  }
+
+  // Emit a single statement. Returns true if statements may follow.
+  bool emitStatement() {
+    switch (random1(16)) {  // favor assignments
+      case 1:  return emitReturn(false); break;
+      case 2:  return emitContinue();    break;
+      case 3:  return emitBreak();       break;
+      case 4:  return emitScope();       break;
+      case 5:  return emitForLoop();     break;
+      case 6:  return emitDoLoop();      break;
+      case 7:  return emitIfStmt();      break;
+      case 8:  return emitSwitch();      break;
+      default: return emitAssignment();  break;
+    }
+  }
+
+  // Emit a statement list. Returns true if statements may follow.
+  bool emitStatementList() {
+    while (stmt_length_ < 1000) {  // avoid run-away
+      stmt_length_++;
+      emitIndentation();
+      if (!emitStatement()) {
+        return false;  // rest would be dead code
+      }
+      // Continuing this list becomes less likely as the total statement list grows.
+      if (random1(stmt_length_) > fuzz_stmt_length_) {
+        break;
+      }
+    }
+    return true;
+  }
+
+  // Emit field declarations.
+  void emitFieldDecls() {
+    fputs("  private boolean mZ = false;\n", out_);
+    fputs("  private int     mI = 0;\n", out_);
+    fputs("  private long    mJ = 0;\n", out_);
+    fputs("  private float   mF = 0;\n", out_);
+    fputs("  private double  mD = 0;\n\n", out_);
+  }
+
+  // Emit array declaration.
+  void emitArrayDecl() {
+    fputs("  private ", out_);
+    emitType(array_type_);
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      fputs("[]", out_);
+    }
+    fputs(" mArray = new ", out_);
+    emitType(array_type_);
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      fprintf(out_, "[%d]", array_size_);
+    }
+    fputs(";\n\n", out_);
+  }
+
+  // Emit test constructor.
+  void emitTestConstructor() {
+    fputs("  private Test() {\n", out_);
+    indentation_ += 2;
+    emitIndentation();
+    emitType(array_type_);
+    fputs(" a = ", out_);
+    emitLiteral(array_type_);
+    fputs(";\n", out_);
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      emitIndentation();
+      fprintf(out_, "for (int i%u = 0; i%u < %u; i%u++) {\n", i, i, array_size_, i);
+      indentation_ += 2;
+    }
+    emitIndentation();
+    fputs("mArray", out_);
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      fprintf(out_, "[i%u]", i);
+    }
+    fputs(" = a;\n", out_);
+    emitIndentation();
+    if (array_type_ == kBoolean) {
+      fputs("a = !a;\n", out_);
+    } else {
+      fputs("a++;\n", out_);
+    }
+    for (uint32_t i = 0; i < array_dim_; i++) {
+      indentation_ -= 2;
+      emitIndentation();
+      fputs("}\n", out_);
+    }
+    indentation_ -= 2;
+    fputs("  }\n\n", out_);
+  }
+
+  // Emit test method.
+  void emitTestMethod() {
+    fputs("  private ", out_);
+    emitType(return_type_);
+    fputs(" testMethod() {\n", out_);
+    indentation_ += 2;
+    if (emitStatementList()) {
+      // Must end with return.
+      emitIndentation();
+      emitReturn(true);
+    }
+    indentation_ -= 2;
+    fputs("  }\n\n", out_);
+  }
+
+  // Emit main method driver.
+  void emitMainMethod() {
+    fputs("  public static void main(String[] args) {\n", out_);
+    indentation_ += 2;
+    fputs("    Test t = new Test();\n    ", out_);
+    emitType(return_type_);
+    fputs(" r = ", out_);
+    emitLiteral(return_type_);
+    fputs(";\n", out_);
+    fputs("    try {\n", out_);
+    fputs("      r = t.testMethod();\n", out_);
+    fputs("    } catch (Exception e) {\n", out_);
+    fputs("      // Arithmetic, null pointer, index out of bounds, etc.\n", out_);
+    fputs("      System.out.println(\"An exception was caught.\");\n", out_);
+    fputs("    }\n", out_);
+    fputs("    System.out.println(\"r  = \" + r);\n",    out_);
+    fputs("    System.out.println(\"mZ = \" + t.mZ);\n", out_);
+    fputs("    System.out.println(\"mI = \" + t.mI);\n", out_);
+    fputs("    System.out.println(\"mJ = \" + t.mJ);\n", out_);
+    fputs("    System.out.println(\"mF = \" + t.mF);\n", out_);
+    fputs("    System.out.println(\"mD = \" + t.mD);\n", out_);
+    fputs("    System.out.println(\"mArray = \" + ", out_);
+    if (array_dim_ == 1) {
+      fputs("Arrays.toString(t.mArray)", out_);
+    } else {
+      fputs("Arrays.deepToString(t.mArray)", out_);
+    }
+    fputs(");\n", out_);
+    indentation_ -= 2;
+    fputs("  }\n", out_);
+  }
+
+  // Emit program header. Emit command line options in the comments.
+  void emitHeader() {
+    fputs("\n/**\n * AOSP Java Fuzz Tester.\n", out_);
+    fputs(" * Automatically generated Java program.\n", out_);
+    fprintf(out_,
+            " * javafuzz -s %u -d %u -l %u -i %u -n %u (version %s)\n */\n\n",
+            fuzz_seed_,
+            fuzz_expr_depth_,
+            fuzz_stmt_length_,
+            fuzz_if_nest_,
+            fuzz_loop_nest_,
+            VERSION);
+    fputs("import java.util.Arrays;\n\n", out_);
+  }
+
+  // Emit single test class with main driver.
+  void emitTestClassWithMain() {
+    fputs("public class Test {\n\n", out_);
+    indentation_ += 2;
+    emitFieldDecls();
+    emitArrayDecl();
+    emitTestConstructor();
+    emitTestMethod();
+    emitMainMethod();
+    indentation_ -= 2;
+    fputs("}\n\n", out_);
+  }
+
+  //
+  // Random integers.
+  //
+
+  // Return random integer in range [0,max).
+  uint32_t random0(uint32_t max) {
+    std::uniform_int_distribution<uint32_t> gen(0, max - 1);
+    return gen(fuzz_random_engine_);
+  }
+
+  // Return random integer in range [1,max].
+  uint32_t random1(uint32_t max) {
+    std::uniform_int_distribution<uint32_t> gen(1, max);
+    return gen(fuzz_random_engine_);
+  }
+
+  // Fuzzing parameters.
+  FILE* out_;
+  std::mt19937 fuzz_random_engine_;
+  const uint32_t fuzz_seed_;
+  const uint32_t fuzz_expr_depth_;
+  const uint32_t fuzz_stmt_length_;
+  const uint32_t fuzz_if_nest_;
+  const uint32_t fuzz_loop_nest_;
+
+  // Return and array setup.
+  const Type return_type_;
+  const Type array_type_;
+  const uint32_t array_dim_;
+  const uint32_t array_size_;
+
+  // Current context.
+  uint32_t indentation_;
+  uint32_t expr_depth_;
+  uint32_t stmt_length_;
+  uint32_t if_nest_;
+  uint32_t loop_nest_;
+  uint32_t switch_nest_;
+  uint32_t do_nest_;
+  uint32_t boolean_local_;
+  uint32_t int_local_;
+  uint32_t long_local_;
+  uint32_t float_local_;
+  uint32_t double_local_;
+};
+
+}  // anonymous namespace
+
+int32_t main(int32_t argc, char** argv) {
+  // Defaults.
+  uint32_t seed = time(NULL);
+  uint32_t expr_depth = 1;
+  uint32_t stmt_length = 4;
+  uint32_t if_nest = 2;
+  uint32_t loop_nest = 3;
+
+  // Parse options.
+  while (1) {
+    int32_t option = getopt(argc, argv, "s:d:l:i:n:h");
+    if (option < 0) {
+      break;  // done
+    }
+    switch (option) {
+      case 's':
+        seed = strtoul(optarg, nullptr, 0);  // deterministic seed
+        break;
+      case 'd':
+        expr_depth = strtoul(optarg, nullptr, 0);
+        break;
+      case 'l':
+        stmt_length = strtoul(optarg, nullptr, 0);
+        break;
+      case 'i':
+        if_nest = strtoul(optarg, nullptr, 0);
+        break;
+      case 'n':
+        loop_nest = strtoul(optarg, nullptr, 0);
+        break;
+      case 'h':
+      default:
+        fprintf(stderr,
+                "usage: %s [-s seed] "
+                "[-d expr-depth] [-l stmt-length] "
+                "[-i if-nest] [-n loop-nest] [-h]\n",
+                argv[0]);
+        return 1;
+    }
+  }
+
+  // Seed global random generator.
+  srand(seed);
+
+  // Generate fuzzed Java program.
+  JavaFuzz fuzz(stdout, seed, expr_depth, stmt_length, if_nest, loop_nest);
+  fuzz.emitProgram();
+  return 0;
+}
diff --git a/tools/javafuzz/run_java_fuzz_test.py b/tools/javafuzz/run_java_fuzz_test.py
new file mode 100755
index 0000000..4f192e7
--- /dev/null
+++ b/tools/javafuzz/run_java_fuzz_test.py
@@ -0,0 +1,406 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import argparse
+import subprocess
+import sys
+import os
+
+from tempfile import mkdtemp
+from threading import Timer
+
+# Normalized return codes.
+EXIT_SUCCESS = 0
+EXIT_TIMEOUT = 1
+EXIT_NOTCOMPILED = 2
+EXIT_NOTRUN = 3
+
+#
+# Utility methods.
+#
+
+def RunCommand(cmd, args, out, err, timeout = 5):
+  """Executes a command, and returns its return code.
+
+  Args:
+    cmd: string, a command to execute
+    args: string, arguments to pass to command (or None)
+    out: string, file name to open for stdout (or None)
+    err: string, file name to open for stderr (or None)
+    timeout: int, time out in seconds
+  Returns:
+    return code of running command (forced EXIT_TIMEOUT on timeout)
+  """
+  cmd = 'exec ' + cmd  # preserve pid
+  if args != None:
+    cmd = cmd + ' ' + args
+  outf = None
+  if out != None:
+    outf = open(out, mode='w')
+  errf = None
+  if err != None:
+    errf = open(err, mode='w')
+  proc = subprocess.Popen(cmd, stdout=outf, stderr=errf, shell=True)
+  timer = Timer(timeout, proc.kill)  # enforces timeout
+  timer.start()
+  proc.communicate()
+  if timer.is_alive():
+    timer.cancel()
+    returncode = proc.returncode
+  else:
+    returncode = EXIT_TIMEOUT
+  if outf != None:
+    outf.close()
+  if errf != None:
+    errf.close()
+  return returncode
+
+def GetJackClassPath():
+  """Returns Jack's classpath."""
+  top = os.environ.get('ANDROID_BUILD_TOP')
+  if top == None:
+    raise FatalError('Cannot find AOSP build top')
+  libdir = top + '/out/host/common/obj/JAVA_LIBRARIES'
+  return libdir + '/core-libart-hostdex_intermediates/classes.jack:' \
+       + libdir + '/core-oj-hostdex_intermediates/classes.jack'
+
+def GetExecutionModeRunner(mode):
+  """Returns a runner for the given execution mode.
+
+  Args:
+    mode: string, execution mode
+  Returns:
+    TestRunner with given execution mode
+  Raises:
+    FatalError: error for unknown execution mode
+  """
+  if mode == 'ri':
+    return TestRunnerRIOnHost()
+  if mode == 'hint':
+    return TestRunnerArtOnHost(True)
+  if mode == 'hopt':
+    return TestRunnerArtOnHost(False)
+  if mode == 'tint':
+    return TestRunnerArtOnTarget(True)
+  if mode == 'topt':
+    return TestRunnerArtOnTarget(False)
+  raise FatalError('Unknown execution mode')
+
+def GetReturnCode(retc):
+  """Returns a string representation of the given normalized return code.
+  Args:
+    retc: int, normalized return code
+  Returns:
+    string representation of normalized return code
+  Raises:
+    FatalError: error for unknown normalized return code
+  """
+  if retc == EXIT_SUCCESS:
+    return 'SUCCESS'
+  if retc == EXIT_TIMEOUT:
+    return 'TIMED-OUT'
+  if retc == EXIT_NOTCOMPILED:
+    return 'NOT-COMPILED'
+  if retc == EXIT_NOTRUN:
+    return 'NOT-RUN'
+  raise FatalError('Unknown normalized return code')
+
+#
+# Execution mode classes.
+#
+
+class TestRunner(object):
+  """Abstraction for running a test in a particular execution mode."""
+  __meta_class__ = abc.ABCMeta
+
+  def GetDescription(self):
+    """Returns a description string of the execution mode."""
+    return self._description
+
+  def GetId(self):
+    """Returns a short string that uniquely identifies the execution mode."""
+    return self._id
+
+  @abc.abstractmethod
+  def CompileAndRunTest(self):
+    """Compile and run the generated test.
+
+    Ensures that the current Test.java in the temporary directory is compiled
+    and executed under the current execution mode. On success, transfers the
+    generated output to the file GetId()_out.txt in the temporary directory.
+    Cleans up after itself.
+
+    Most nonzero return codes are assumed non-divergent, since systems may
+    exit in different ways. This is enforced by normalizing return codes.
+
+    Returns:
+      normalized return code
+    """
+    pass
+
+class TestRunnerRIOnHost(TestRunner):
+  """Concrete test runner of the reference implementation on host."""
+
+  def  __init__(self):
+    """Constructor for the RI tester."""
+    self._description = 'RI on host'
+    self._id = 'RI'
+
+  def CompileAndRunTest(self):
+    if RunCommand('javac', 'Test.java',
+                  out=None, err=None, timeout=30) == EXIT_SUCCESS:
+      retc = RunCommand('java', 'Test', 'RI_run_out.txt', err=None)
+      if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+        retc = EXIT_NOTRUN
+    else:
+      retc = EXIT_NOTCOMPILED
+    # Cleanup and return.
+    RunCommand('rm', '-f Test.class', out=None, err=None)
+    return retc
+
+class TestRunnerArtOnHost(TestRunner):
+  """Concrete test runner of Art on host (interpreter or optimizing)."""
+
+  def  __init__(self, interpreter):
+    """Constructor for the Art on host tester.
+
+    Args:
+      interpreter: boolean, selects between interpreter or optimizing
+    """
+    self._art_args = '-cp classes.dex Test'
+    if interpreter:
+      self._description = 'Art interpreter on host'
+      self._id = 'HInt'
+      self._art_args = '-Xint ' + self._art_args
+    else:
+      self._description = 'Art optimizing on host'
+      self._id = 'HOpt'
+    self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java'
+
+  def CompileAndRunTest(self):
+    if RunCommand('jack', self._jack_args,
+                  out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS:
+      out = self.GetId() + '_run_out.txt'
+      retc = RunCommand('art', self._art_args, out, 'arterr.txt')
+      if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+        retc = EXIT_NOTRUN
+    else:
+      retc = EXIT_NOTCOMPILED
+    # Cleanup and return.
+    RunCommand('rm', '-rf classes.dex jackerr.txt arterr.txt android-data*',
+               out=None, err=None)
+    return retc
+
+# TODO: very rough first version without proper cache,
+#       reuse staszkiewicz' module for properly setting up dalvikvm on target.
+class TestRunnerArtOnTarget(TestRunner):
+  """Concrete test runner of Art on target (interpreter or optimizing)."""
+
+  def  __init__(self, interpreter):
+    """Constructor for the Art on target tester.
+
+    Args:
+      interpreter: boolean, selects between interpreter or optimizing
+    """
+    self._dalvik_args = '-cp /data/local/tmp/classes.dex Test'
+    if interpreter:
+      self._description = 'Art interpreter on target'
+      self._id = 'TInt'
+      self._dalvik_args = '-Xint ' + self._dalvik_args
+    else:
+      self._description = 'Art optimizing on target'
+      self._id = 'TOpt'
+    self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java'
+
+  def CompileAndRunTest(self):
+    if RunCommand('jack', self._jack_args,
+                  out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS:
+      if RunCommand('adb push', 'classes.dex /data/local/tmp/',
+                    'adb.txt', err=None) != EXIT_SUCCESS:
+        raise FatalError('Cannot push to target device')
+      out = self.GetId() + '_run_out.txt'
+      retc = RunCommand('adb shell dalvikvm', self._dalvik_args, out, err=None)
+      if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+        retc = EXIT_NOTRUN
+    else:
+      retc = EXIT_NOTCOMPILED
+    # Cleanup and return.
+    RunCommand('rm', '-f classes.dex jackerr.txt adb.txt',
+               out=None, err=None)
+    RunCommand('adb shell', 'rm -f /data/local/tmp/classes.dex',
+               out=None, err=None)
+    return retc
+
+#
+# Tester classes.
+#
+
+class FatalError(Exception):
+  """Fatal error in the tester."""
+  pass
+
+class JavaFuzzTester(object):
+  """Tester that runs JavaFuzz many times and report divergences."""
+
+  def  __init__(self, num_tests, mode1, mode2):
+    """Constructor for the tester.
+
+    Args:
+    num_tests: int, number of tests to run
+    mode1: string, execution mode for first runner
+    mode2: string, execution mode for second runner
+    """
+    self._num_tests = num_tests
+    self._runner1 = GetExecutionModeRunner(mode1)
+    self._runner2 = GetExecutionModeRunner(mode2)
+    self._save_dir = None
+    self._tmp_dir = None
+    # Statistics.
+    self._test = 0
+    self._num_success = 0
+    self._num_not_compiled = 0
+    self._num_not_run = 0
+    self._num_timed_out = 0
+    self._num_divergences = 0
+
+  def __enter__(self):
+    """On entry, enters new temp directory after saving current directory.
+
+    Raises:
+      FatalError: error when temp directory cannot be constructed
+    """
+    self._save_dir = os.getcwd()
+    self._tmp_dir = mkdtemp(dir="/tmp/")
+    if self._tmp_dir == None:
+      raise FatalError('Cannot obtain temp directory')
+    os.chdir(self._tmp_dir)
+    return self
+
+  def __exit__(self, etype, evalue, etraceback):
+    """On exit, re-enters previously saved current directory and cleans up."""
+    os.chdir(self._save_dir)
+    if self._num_divergences == 0:
+      RunCommand('rm', '-rf ' + self._tmp_dir, out=None, err=None)
+
+  def Run(self):
+    """Runs JavaFuzz many times and report divergences."""
+    print
+    print '**\n**** JavaFuzz Testing\n**'
+    print
+    print '#Tests    :', self._num_tests
+    print 'Directory :', self._tmp_dir
+    print 'Exec-mode1:', self._runner1.GetDescription()
+    print 'Exec-mode2:', self._runner2.GetDescription()
+    print
+    self.ShowStats()
+    for self._test in range(1, self._num_tests + 1):
+      self.RunJavaFuzzTest()
+      self.ShowStats()
+    if self._num_divergences == 0:
+      print '\n\nsuccess (no divergences)\n'
+    else:
+      print '\n\nfailure (divergences)\n'
+
+  def ShowStats(self):
+    """Shows current statistics (on same line) while tester is running."""
+    print '\rTests:', self._test, \
+        'Success:', self._num_success, \
+        'Not-compiled:', self._num_not_compiled, \
+        'Not-run:', self._num_not_run, \
+        'Timed-out:', self._num_timed_out, \
+        'Divergences:', self._num_divergences,
+    sys.stdout.flush()
+
+  def RunJavaFuzzTest(self):
+    """Runs a single JavaFuzz test, comparing two execution modes."""
+    self.ConstructTest()
+    retc1 = self._runner1.CompileAndRunTest()
+    retc2 = self._runner2.CompileAndRunTest()
+    self.CheckForDivergence(retc1, retc2)
+    self.CleanupTest()
+
+  def ConstructTest(self):
+    """Use JavaFuzz to generate next Test.java test.
+
+    Raises:
+      FatalError: error when javafuzz fails
+    """
+    if RunCommand('javafuzz', args=None,
+                  out='Test.java', err=None) != EXIT_SUCCESS:
+      raise FatalError('Unexpected error while running JavaFuzz')
+
+  def CheckForDivergence(self, retc1, retc2):
+    """Checks for divergences and updates statistics.
+
+    Args:
+      retc1: int, normalized return code of first runner
+      retc2: int, normalized return code of second runner
+    """
+    if retc1 == retc2:
+      # Non-divergent in return code.
+      if retc1 == EXIT_SUCCESS:
+        # Both compilations and runs were successful, inspect generated output.
+        args = self._runner1.GetId() + '_run_out.txt ' \
+            + self._runner2.GetId() + '_run_out.txt'
+        if RunCommand('diff', args, out=None, err=None) != EXIT_SUCCESS:
+          self.ReportDivergence('divergence in output')
+        else:
+          self._num_success += 1
+      elif retc1 == EXIT_TIMEOUT:
+        self._num_timed_out += 1
+      elif retc1 == EXIT_NOTCOMPILED:
+        self._num_not_compiled += 1
+      else:
+        self._num_not_run += 1
+    else:
+      # Divergent in return code.
+      self.ReportDivergence('divergence in return code: ' +
+                            GetReturnCode(retc1) + ' vs. ' +
+                            GetReturnCode(retc2))
+
+  def ReportDivergence(self, reason):
+    """Reports and saves a divergence."""
+    self._num_divergences += 1
+    print '\n', self._test, reason
+    # Save.
+    ddir = 'divergence' + str(self._test)
+    RunCommand('mkdir', ddir, out=None, err=None)
+    RunCommand('mv', 'Test.java *.txt ' + ddir, out=None, err=None)
+
+  def CleanupTest(self):
+    """Cleans up after a single test run."""
+    RunCommand('rm', '-f Test.java *.txt', out=None, err=None)
+
+
+def main():
+  # Handle arguments.
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--num_tests', default=10000,
+                      type=int, help='number of tests to run')
+  parser.add_argument('--mode1', default='ri',
+                      help='execution mode 1 (default: ri)')
+  parser.add_argument('--mode2', default='hopt',
+                      help='execution mode 2 (default: hopt)')
+  args = parser.parse_args()
+  if args.mode1 == args.mode2:
+    raise FatalError("Identical execution modes given")
+  # Run the JavaFuzz tester.
+  with JavaFuzzTester(args.num_tests, args.mode1, args.mode2) as fuzzer:
+    fuzzer.Run()
+
+if __name__ == "__main__":
+  main()
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index bd5ff18..cbb6e1d 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -36,6 +36,15 @@
   names: ["libcore.io.OsTest#testUnixDomainSockets_in_file_system"]
 },
 {
+  description: "TCP_USER_TIMEOUT is not defined on host's tcp.h (glibc-2.15-4.8).",
+  result: EXEC_FAILED,
+  modes: [host],
+  names: ["libcore.android.system.OsConstantsTest#testTcpUserTimeoutIsDefined",
+          "libcore.io.OsTest#test_socket_tcpUserTimeout_setAndGet",
+          "libcore.io.OsTest#test_socket_tcpUserTimeout_doesNotWorkOnDatagramSocket"],
+  bug: 30402085
+},
+{
   description: "Issue with incorrect device time (1970)",
   result: EXEC_FAILED,
   modes: [device],
@@ -171,43 +180,6 @@
   bug: 25437292
 },
 {
-  description: "Failing tests after OpenJDK move.",
-  result: EXEC_FAILED,
-  bug: 26326992,
-  names: ["libcore.icu.RelativeDateTimeFormatterTest#test_getRelativeDateTimeStringDST",
-          "libcore.java.lang.OldSystemTest#test_load",
-          "libcore.java.text.NumberFormatTest#test_currencyWithPatternDigits",
-          "libcore.java.text.NumberFormatTest#test_setCurrency",
-          "libcore.java.text.OldNumberFormatTest#test_getIntegerInstanceLjava_util_Locale",
-          "libcore.java.util.CalendarTest#testAddOneDayAndOneDayOver30MinuteDstForwardAdds48Hours",
-          "libcore.java.util.CalendarTest#testNewCalendarKoreaIsSelfConsistent",
-          "libcore.java.util.CalendarTest#testSetTimeInZoneWhereDstIsNoLongerUsed",
-          "libcore.java.util.CalendarTest#test_nullLocale",
-          "libcore.java.util.FormatterTest#test_numberLocalization",
-          "libcore.java.util.FormatterTest#test_uppercaseConversions",
-          "libcore.javax.crypto.CipherTest#testCipher_getInstance_WrongType_Failure",
-          "libcore.javax.crypto.CipherTest#testDecryptBufferZeroSize_mustDecodeToEmptyString",
-          "libcore.javax.security.auth.x500.X500PrincipalTest#testExceptionsForWrongDNs",
-          "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getDate",
-          "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getExpiration",
-          "org.apache.harmony.regex.tests.java.util.regex.PatternSyntaxExceptionTest#testPatternSyntaxException",
-          "org.apache.harmony.tests.java.lang.FloatTest#test_parseFloat_LString_Harmony6261",
-          "org.apache.harmony.tests.java.lang.ThreadTest#test_isDaemon",
-          "org.apache.harmony.tests.java.text.DecimalFormatSymbolsTest#test_setInternationalCurrencySymbolLjava_lang_String",
-          "org.apache.harmony.tests.java.text.DecimalFormatTest#testSerializationHarmonyRICompatible",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parseLjava_lang_StringLjava_text_ParsePosition",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_W_w_dd_MMMM_yyyy_EEEE",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_dayOfYearPatterns",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_m_z",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_2DigitOffsetFromGMT",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetFromGMT",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetNoGMT",
-          "org.apache.harmony.tests.java.util.jar.JarFileTest#test_getInputStreamLjava_util_jar_JarEntry_subtest0",
-          "libcore.java.util.CalendarTest#test_clear_45877",
-          "org.apache.harmony.crypto.tests.javax.crypto.spec.SecretKeySpecTest#testGetFormat",
-          "org.apache.harmony.tests.java.util.TimerTaskTest#test_scheduledExecutionTime"]
-},
-{
   description: "Missing resource in classpath",
   result: EXEC_FAILED,
   modes: [device],
@@ -243,50 +215,41 @@
           "org.apache.harmony.tests.java.util.prefs.FilePreferencesImplTest#testPutGet"]
 },
 {
-  description: "libnativehelper_compat_libc++ loading issue",
-  result: EXEC_FAILED,
-  modes: [device],
-  names: ["dalvik.system.JniTest#testGetSuperclass",
-          "dalvik.system.JniTest#testPassingBooleans",
-          "dalvik.system.JniTest#testPassingBytes",
-          "dalvik.system.JniTest#testPassingChars",
-          "dalvik.system.JniTest#testPassingClass",
-          "dalvik.system.JniTest#testPassingDoubles",
-          "dalvik.system.JniTest#testPassingFloats",
-          "dalvik.system.JniTest#testPassingInts",
-          "dalvik.system.JniTest#testPassingLongs",
-          "dalvik.system.JniTest#testPassingObjectReferences",
-          "dalvik.system.JniTest#testPassingShorts",
-          "dalvik.system.JniTest#testPassingThis",
-          "libcore.util.NativeAllocationRegistryTest#testBadSize",
-          "libcore.util.NativeAllocationRegistryTest#testEarlyFree",
-          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndNoSharedRegistry",
-          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndSharedRegistry",
-          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndNoSharedRegistry",
-          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndSharedRegistry",
-          "libcore.util.NativeAllocationRegistryTest#testNullArguments"]
-},
-{
-  description: "libnativehelper_compat_libc++.so not found by dlopen on ARM64",
-  result: EXEC_FAILED,
-  modes: [device],
-  bug: 28082914,
-  names: ["libcore.java.lang.ThreadTest#testContextClassLoaderIsInherited",
-          "libcore.java.lang.ThreadTest#testContextClassLoaderIsNotNull",
-          "libcore.java.lang.ThreadTest#testGetAllStackTracesIncludesAllGroups",
-          "libcore.java.lang.ThreadTest#testGetStackTrace",
-          "libcore.java.lang.ThreadTest#testJavaContextClassLoader",
-          "libcore.java.lang.ThreadTest#testLeakingStartedThreads",
-          "libcore.java.lang.ThreadTest#testLeakingUnstartedThreads",
-          "libcore.java.lang.ThreadTest#testNativeThreadNames",
-          "libcore.java.lang.ThreadTest#testThreadInterrupted",
-          "libcore.java.lang.ThreadTest#testThreadSleep",
-          "libcore.java.lang.ThreadTest#testThreadSleepIllegalArguments",
-          "libcore.java.lang.ThreadTest#testThreadWakeup"]
-},
-{
   description: "Only work with --mode=activity",
   result: EXEC_FAILED,
   names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
+},
+{
+  description: "Made for extending, shouldn't be run",
+  result: EXEC_FAILED,
+  names: ["jsr166.CollectionTest#testEmptyMeansEmpty",
+          "jsr166.Collection8Test#testForEach",
+          "jsr166.Collection8Test#testForEachConcurrentStressTest"]
+},
+{
+  description: "Flaky test",
+  result: EXEC_FAILED,
+  bug: 30107038,
+  modes: [device],
+  names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"]
+},
+{
+  description: "Flaky failure, native crash in the runtime.
+                Unclear if this relates to the tests running sh as a child process.",
+  result: EXEC_FAILED,
+  bug: 30657148,
+  modes: [device],
+  names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit",
+          "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"]
+},
+{
+  description: "BigInteger failures due to the BoringSSL upgrade",
+  result: EXEC_FAILED,
+  bug: 30917411,
+  names: [
+    "org.apache.harmony.tests.java.math.BigIntegerModPowTest#testModPowNegExp",
+    "org.apache.harmony.tests.java.math.BigIntegerModPowTest#testmodInversePos1",
+    "org.apache.harmony.tests.java.math.BigIntegerTest#test_modInverseLjava_math_BigInteger"
+  ]
 }
 ]
diff --git a/tools/public.libraries.buildbot.txt b/tools/public.libraries.buildbot.txt
new file mode 100644
index 0000000..4b01796
--- /dev/null
+++ b/tools/public.libraries.buildbot.txt
@@ -0,0 +1,8 @@
+libart.so
+libartd.so
+libbacktrace.so
+libc.so
+libc++.so
+libdl.so
+libm.so
+libnativehelper.so
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 572ac00..bdb2d4b 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -19,8 +19,12 @@
   exit 1
 fi
 
+if [ -z "$ANDROID_HOST_OUT" ] ; then
+  ANDROID_HOST_OUT=${OUT_DIR-$ANDROID_BUILD_TOP/out}/host/linux-x86
+fi
+
 # Jar containing all the tests.
-test_jack=${OUT_DIR-out}/host/common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
+test_jack=${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
@@ -28,18 +32,6 @@
   exit 1
 fi
 
-if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
-  # For the moment, skip JDWP tests when read barriers are enabled, as
-  # they sometimes exhibit a deadlock issue with the concurrent
-  # copying collector in the read barrier configuration, between the
-  # HeapTaskDeamon and the JDWP thread (b/25800335).
-  #
-  # TODO: Re-enable the JDWP tests when this deadlock issue is fixed.
-  echo "JDWP tests are temporarily disabled in the read barrier configuration because of"
-  echo "a deadlock issue (b/25800335)."
-  exit 0
-fi
-
 art="/data/local/tmp/system/bin/art"
 art_debugee="sh /data/local/tmp/system/bin/art"
 args=$@
@@ -56,6 +48,8 @@
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
 host="no"
+# Use JIT compiling by default.
+use_jit=true
 
 while true; do
   if [[ "$1" == "--mode=host" ]]; then
@@ -74,6 +68,11 @@
   elif [[ $1 == -Ximage:* ]]; then
     image="$1"
     shift
+  elif [[ "$1" == "--no-jit" ]]; then
+    use_jit=false
+    # Remove the --no-jit from the arguments.
+    args=${args/$1}
+    shift
   elif [[ $1 == "--debug" ]]; then
     debug="yes"
     # Remove the --debug from the arguments.
@@ -102,8 +101,12 @@
 if [[ "$image" != "" ]]; then
   vm_args="--vm-arg $image"
 fi
-vm_args="$vm_args --vm-arg -Xusejit:true"
-debuggee_args="$debuggee_args -Xusejit:true"
+if $use_jit; then
+  vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+  debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=interpret-only"
+fi
+vm_args="$vm_args --vm-arg -Xusejit:$use_jit"
+debuggee_args="$debuggee_args -Xusejit:$use_jit"
 if [[ $debug == "yes" ]]; then
   art="$art -d"
   art_debugee="$art_debugee -d"
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index a55af94..2a6e172 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -19,19 +19,29 @@
   exit 1
 fi
 
-# Jar containing jsr166 tests.
-jsr166_test_jack=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/jsr166-tests_intermediates/classes.jack
-
-# Jar containing all the other tests.
-test_jack=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/classes.jack
-
-
-if [ ! -f $test_jack ]; then
-  echo "Before running, you must build core-tests, jsr166-tests and vogar: \
-        make core-tests jsr166-tests vogar"
-  exit 1
+if [ -z "$ANDROID_PRODUCT_OUT" ] ; then
+  JAVA_LIBRARIES=out/target/common/obj/JAVA_LIBRARIES
+else
+  JAVA_LIBRARIES=${ANDROID_PRODUCT_OUT}/../../common/obj/JAVA_LIBRARIES
 fi
 
+function cparg {
+  for var
+  do
+    printf -- "--classpath ${JAVA_LIBRARIES}/${var}_intermediates/classes.jack ";
+  done
+}
+
+DEPS="core-tests jsr166-tests mockito-target"
+
+for lib in $DEPS
+do
+  if [ ! -f "${JAVA_LIBRARIES}/${lib}_intermediates/classes.jack" ]; then
+    echo "${lib} is missing. Before running, you must run art/tools/buildbot-build.sh"
+    exit 1
+  fi
+done
+
 expectations="--expectations art/tools/libcore_failures.txt"
 if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
   # Tolerate some more failures on the concurrent collector configurations.
@@ -43,6 +53,9 @@
   emulator="yes"
 fi
 
+# Use JIT compiling by default.
+use_jit=true
+
 # Packages that currently work correctly with the expectation files.
 working_packages=("dalvik.system"
                   "libcore.icu"
@@ -91,6 +104,11 @@
     # classpath/resources differences when compiling the boot image.
     vogar_args="$vogar_args --vm-arg -Ximage:/non/existent/vogar.art"
     shift
+  elif [[ "$1" == "--no-jit" ]]; then
+    # Remove the --no-jit from the arguments.
+    vogar_args=${vogar_args/$1}
+    use_jit=false
+    shift
   elif [[ "$1" == "--debug" ]]; then
     # Remove the --debug from the arguments.
     vogar_args=${vogar_args/$1}
@@ -111,7 +129,13 @@
 # Use Jack with "1.8" configuration.
 vogar_args="$vogar_args --toolchain jack --language JN"
 
+# JIT settings.
+if $use_jit; then
+  vogar_args="$vogar_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+fi
+vogar_args="$vogar_args --vm-arg -Xusejit:$use_jit"
+
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args --vm-arg -Xusejit:true $expectations --classpath $jsr166_test_jack --classpath $test_jack ${working_packages[@]}
+vogar $vogar_args $expectations $(cparg $DEPS) ${working_packages[@]}